diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6ec75a7f..445aeaeb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -44,6 +44,8 @@ jobs: uses: sbt/setup-sbt@v1 - name: Set vm.max_map_count run: sudo sysctl -w vm.max_map_count=262144 + - name: Cross Compile + run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g -Dfile.encoding=UTF-8" sbt + compile - name: Run tests run: SBT_OPTS="-Xss4M -Xms1g -Xmx4g -Dfile.encoding=UTF-8" sbt compile test @@ -65,5 +67,5 @@ jobs: # cache: 'sbt' - name: Setup sbt launcher uses: sbt/setup-sbt@v1 - - name: Formatting - run: sbt scalafmtSbtCheck scalafmtCheck test:scalafmtCheck \ No newline at end of file + - name: Checks + run: sbt headerCheck scalafmtSbtCheck scalafmtCheck test:scalafmtCheck \ No newline at end of file diff --git a/README.md b/README.md index 5a424715..ded584de 100644 --- a/README.md +++ b/README.md @@ -11,16 +11,17 @@ ## Key Features **Unified Elasticsearch API** -This project provides a trait-based interface (`ElasticClientApi`) that aggregates the core functionalities of Elasticsearch: indexing, searching, updating, deleting, mapping, aliases, refreshing, and more. This design abstracts the underlying client implementation and ensures compatibility across different Elasticsearch versions. +This project provides a trait-based interface (`ElasticClientApi`) that aggregates the core functionalities of Elasticsearch: [indexing](documentation/client/index.md), [updating](documentation/client/update.md), [deleting](documentation/client/delete.md), [bulk](documentation/client/bulk.md), [searching](documentation/client/search.md), [scrolling](documentation/client/scroll.md), [mapping](documentation/client/mappings.md), [aliases](documentation/client/aliases.md), [refreshing](documentation/client/refresh.md), and [more](documentation/client/README.md). +This design abstracts the underlying client implementation and ensures compatibility across different Elasticsearch versions. -- `JestClientApi`: For Elasticsearch 6 using the open-source [Jest client](https://github.com/searchbox-io/Jest). +- `JavaClientApi`: For Elasticsearch 8 and 9 using the official Java client. - `RestHighLevelClientApi`: For Elasticsearch 6 and 7 using the official high-level REST client. -- `ElasticsearchClientApi`: For Elasticsearch 8 and 9 using the official Java client. +- `JestClientApi`: For Elasticsearch 6 using the open-source [Jest client](https://github.com/searchbox-io/Jest). By relying on these concrete implementations, developers can switch between versions with minimal changes to their business logic. **SQL to Elasticsearch Query Translation** -Elastic Client includes a parser capable of translating SQL `SELECT` queries into Elasticsearch queries. The parser produces an intermediate representation, which is then converted into [Elastic4s](https://github.com/sksamuel/elastic4s) DSL queries and ultimately into native Elasticsearch queries. This allows data engineers and analysts to express queries in familiar [SQL](documentation/README.md) syntax. +Elastic Client includes a parser capable of translating SQL `SELECT` queries into Elasticsearch queries. The parser produces an intermediate representation, which is then converted into [Elastic4s](https://github.com/sksamuel/elastic4s) DSL queries and ultimately into native Elasticsearch queries. This allows data engineers and analysts to express queries in familiar [SQL](documentation/sql/README.md) syntax. **Dynamic Mapping Migration** Elastic Client provides tools to analyze and compare existing mappings with new ones. If differences are detected, it can automatically perform safe migrations. This includes creating temporary indices, reindexing, and renaming — all while preserving data integrity. This eliminates the need for manual mapping migrations and reduces downtime. @@ -28,12 +29,16 @@ Elastic Client provides tools to analyze and compare existing mappings with new **High-Performance Bulk API with Akka Streams** Bulk operations leverage the power of Akka Streams to efficiently process and index large volumes of data. This stream-based approach improves performance, resilience, and backpressure handling, especially for real-time or high-throughput indexing scenarios. +**Scroll API with automatic Scroll Strategy detection** +The Scroll API is also integrated with Akka Streams, enabling efficient retrieval of large datasets in a streaming fashion. This allows applications to process search results incrementally, reducing memory consumption and improving responsiveness. +It automatically selects the optimal scrolling strategy (PIT + search_after, search_after, or classic scroll) based on your query and Elasticsearch version. + **Akka Persistence Integration** The project offers seamless integration with Akka Persistence. This enables Elasticsearch indices to be updated reactively based on persistent events, offering a robust pattern for event-sourced systems. ## Roadmap -Future enhancements include expanding the SQL parser to support additional operations such as `INSERT`, `UPDATE`, and `DELETE`. The long-term vision is to deliver a fully functional, open-source **JDBC connector for Elasticsearch**, empowering users to interact with their data using standard SQL tooling. +Future enhancements include expanding the SQL parser to support additional operations such as `CREATE`, `ALTER`, `INSERT`, `UPDATE`, and `DELETE`. The long-term vision is to deliver a fully functional, open-source **JDBC connector for Elasticsearch**, empowering users to interact with their data using standard SQL tooling. ## License diff --git a/build.sbt b/build.sbt index 3712b2b9..83047602 100644 --- a/build.sbt +++ b/build.sbt @@ -19,7 +19,7 @@ ThisBuild / organization := "app.softnetwork" name := "softclient4es" -ThisBuild / version := "0.9.3" +ThisBuild / version := "0.10.0" ThisBuild / scalaVersion := scala213 @@ -54,6 +54,22 @@ lazy val moduleSettings = Seq( ThisBuild / javacOptions ++= Seq("-source", "1.8", "-target", "1.8") +ThisBuild / javaOptions ++= Seq( + "--add-opens=java.base/java.util=ALL-UNNAMED", + "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED", + "--add-opens=java.base/java.lang=ALL-UNNAMED", + "--add-opens=java.base/java.lang.invoke=ALL-UNNAMED", + "--add-opens=java.base/java.math=ALL-UNNAMED", + "--add-opens=java.base/java.io=ALL-UNNAMED", + "--add-opens=java.base/java.net=ALL-UNNAMED", + "--add-opens=java.base/java.nio=ALL-UNNAMED", + "--add-opens=java.base/java.text=ALL-UNNAMED", + "--add-opens=java.base/java.time=ALL-UNNAMED", + "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED" +) + +Test / javaOptions ++= (javaOptions.value) + ThisBuild / resolvers ++= Seq( "Softnetwork Server" at "https://softnetwork.jfrog.io/artifactory/releases/", "Softnetwork Snapshots" at "https://softnetwork.jfrog.io/artifactory/snapshots/", @@ -75,6 +91,7 @@ val json4s = Seq( ).map(_.excludeAll(jacksonExclusions: _*)) ThisBuild / libraryDependencies ++= Seq( + "org.scala-lang.modules" %% "scala-collection-compat" % "2.11.0", "org.scala-lang.modules" %% "scala-parser-combinators" % "1.1.2" ) // ++ configDependencies ++ json4s ++ logging @@ -101,6 +118,17 @@ lazy val core = project sql % "compile->compile;test->test;it->it" ) +lazy val persistence = project + .in(file("persistence")) + .configs(IntegrationTest) + .settings( + Defaults.itSettings, + moduleSettings + ) + .dependsOn( + core % "compile->compile;test->test;it->it" + ) + def copyTestkit(esVersion: String): Def.Initialize[Task[Unit]] = Def.task { val src = file("core/testkit") val target = baseDirectory.value @@ -136,7 +164,7 @@ def testkitProject(esVersion: String, ss: Def.SettingsDefinition*): Project = { .settings(ss: _*) .enablePlugins(BuildInfoPlugin) .dependsOn( - core % "compile->compile;test->test;it->it" + persistence % "compile->compile;test->test;it->it" ) } @@ -383,6 +411,7 @@ lazy val root = project .aggregate( sql, core, + persistence, es6, es7, es8, diff --git a/core/build.sbt b/core/build.sbt index 0659cfdf..e1cd0f88 100644 --- a/core/build.sbt +++ b/core/build.sbt @@ -4,8 +4,21 @@ organization := "app.softnetwork.elastic" name := "softclient4es-core" -val configDependencies = Seq( - "com.typesafe" % "config" % Versions.typesafeConfig +val akka = Seq( + "com.typesafe.akka" %% "akka-actor" % Versions.akka, + "com.typesafe.akka" %% "akka-cluster-sharding-typed" % Versions.akka, + "com.typesafe.akka" %% "akka-slf4j" % Versions.akka, + "com.typesafe.akka" %% "akka-discovery" % Versions.akka, + "com.typesafe.akka" %% "akka-stream" % Versions.akka +) + +val typesafeConfig = Seq( + "com.typesafe" % "config" % Versions.typesafeConfig, + "com.github.kxbmap" %% "configs" % Versions.kxbmap +) + +val http = Seq( + "org.apache.httpcomponents" % "httpcore" % "4.4.12" % "provided" ) val json4s = Seq( @@ -13,6 +26,10 @@ val json4s = Seq( "org.json4s" %% "json4s-ext" % Versions.json4s ).map(_.excludeAll(jacksonExclusions *)) -libraryDependencies ++= configDependencies ++ -json4s :+ "com.google.code.gson" % "gson" % Versions.gson :+ -("app.softnetwork.persistence" %% "persistence-core" % Versions.genericPersistence excludeAll (jacksonExclusions *)) +val mockito = Seq( + "org.mockito" %% "mockito-scala" % "1.17.12" % Test +) + +libraryDependencies ++= akka ++ typesafeConfig ++ http ++ +json4s ++ mockito :+ "com.google.code.gson" % "gson" % Versions.gson :+ + "com.typesafe.scala-logging" %% "scala-logging" % Versions.scalaLogging diff --git a/core/src/main/resources/softnetwork-elastic.conf b/core/src/main/resources/softnetwork-elastic.conf index d884e512..25f1644b 100644 --- a/core/src/main/resources/softnetwork-elastic.conf +++ b/core/src/main/resources/softnetwork-elastic.conf @@ -1,11 +1,14 @@ elastic { - ip = "localhost" - ip = ${?ELASTIC_IP} + # Connection settings + host = "localhost" + host = ${?ELASTIC_HOST} + host = ${?ELASTIC_IP} # Alternative environment variable for host IP for backward compatibility port = 9200 port = ${?ELASTIC_PORT} + # Authentication credentials { - url = "http://"${elastic.ip}":"${elastic.port} + url = "http://"${elastic.host}":"${elastic.port} username = "" password = "" @@ -15,7 +18,25 @@ elastic { } + # Performance multithreaded = true - discovery-enabled = false + connection-timeout = 5s + socket-timeout = 30s + # Cluster discovery + discovery { + enabled = false + frequency = 5m + } + + # Metrics and Monitoring + metrics { + enabled = true + monitoring { + enabled = true + interval = 30s + failure-rate-threshold = 10.0 # Alert if > 10% failures + latency-threshold = 1000.0 # Alert if average latency > 1000ms + } + } } \ No newline at end of file diff --git a/core/src/main/scala-2.12/app/softnetwork/elastic/client/ElasticConfig.scala b/core/src/main/scala-2.12/app/softnetwork/elastic/client/ElasticConfig.scala index 837acd84..d07582f6 100644 --- a/core/src/main/scala-2.12/app/softnetwork/elastic/client/ElasticConfig.scala +++ b/core/src/main/scala-2.12/app/softnetwork/elastic/client/ElasticConfig.scala @@ -1,13 +1,49 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package app.softnetwork.elastic.client import com.typesafe.config.{Config, ConfigFactory} import com.typesafe.scalalogging.StrictLogging import configs.Configs +import java.time.Duration + +/** Complete Elasticsearch client configuration. + * + * @param credentials + * Connection credentials (url, username, password) + * @param multithreaded + * Enables multi-threaded mode for parallel operations + * @param discovery + * Automatic cluster node discovery configuration + * @param connectionTimeout + * Connection timeout to the cluster + * @param socketTimeout + * Socket operation timeout + * @param metrics + * Metrics and monitoring configuration + */ case class ElasticConfig( credentials: ElasticCredentials = ElasticCredentials(), multithreaded: Boolean = true, - discoveryEnabled: Boolean = false + discovery: DiscoveryConfig, + connectionTimeout: Duration, + socketTimeout: Duration, + metrics: MetricsConfig ) object ElasticConfig extends StrictLogging { diff --git a/core/src/main/scala-2.13/app/softnetwork/elastic/client/ElasticConfig.scala b/core/src/main/scala-2.13/app/softnetwork/elastic/client/ElasticConfig.scala index 93c325d9..ddc77f82 100644 --- a/core/src/main/scala-2.13/app/softnetwork/elastic/client/ElasticConfig.scala +++ b/core/src/main/scala-2.13/app/softnetwork/elastic/client/ElasticConfig.scala @@ -16,14 +16,35 @@ package app.softnetwork.elastic.client +import app.softnetwork.elastic.client.metrics.MetricsConfig import com.typesafe.config.{Config, ConfigFactory} import com.typesafe.scalalogging.StrictLogging import configs.ConfigReader +import java.time.Duration + +/** Complete Elasticsearch client configuration. + * + * @param credentials + * Connection credentials (url, username, password) + * @param multithreaded + * Enables multi-threaded mode for parallel operations + * @param discovery + * Automatic cluster node discovery configuration + * @param connectionTimeout + * Connection timeout to the cluster + * @param socketTimeout + * Socket operation timeout + * @param metrics + * Metrics and monitoring configuration + */ case class ElasticConfig( credentials: ElasticCredentials = ElasticCredentials(), multithreaded: Boolean = true, - discoveryEnabled: Boolean = false + discovery: DiscoveryConfig, + connectionTimeout: Duration, + socketTimeout: Duration, + metrics: MetricsConfig ) object ElasticConfig extends StrictLogging { diff --git a/core/src/main/scala/app/softnetwork/elastic/client/AggregateApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/AggregateApi.scala new file mode 100644 index 00000000..fb4dddb6 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/AggregateApi.scala @@ -0,0 +1,199 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ElasticFailure, ElasticResult, ElasticSuccess} +import app.softnetwork.elastic.sql.query.SQLQuery + +import java.time.temporal.Temporal +import scala.annotation.tailrec +import scala.concurrent.{ExecutionContext, Future} + +/** Aggregate API for Elasticsearch clients. + * + * @tparam T + * - the type of aggregate result + */ +trait AggregateApi[T <: AggregateResult] { + + /** Aggregate the results of the given SQL query. + * + * @param sqlQuery + * - the query to aggregate the results for + * @return + * a sequence of aggregated results + */ + def aggregate(sqlQuery: SQLQuery)(implicit + ec: ExecutionContext + ): Future[ElasticResult[_root_.scala.collection.Seq[T]]] +} + +/** Aggregate API for single value aggregate results. + */ +trait SingleValueAggregateApi + extends AggregateApi[SingleValueAggregateResult] + with ElasticConversion { + _: SearchApi => + + /** Aggregate the results of the given SQL query. + * + * @param sqlQuery + * - the query to aggregate the results for + * @return + * a sequence of aggregated results + */ + override def aggregate( + sqlQuery: SQLQuery + )(implicit + ec: ExecutionContext + ): Future[ElasticResult[_root_.scala.collection.Seq[SingleValueAggregateResult]]] = { + Future { + @tailrec + def findAggregation( + name: String, + results: Map[String, Any] + ): Option[Any] = { + name.split("\\.") match { + case Array(_, tail @ _*) if tail.nonEmpty => + findAggregation( + tail.mkString("."), + results + ) + case _ => results.get(name) + } + } + + @tailrec + def getAggregateValue(s: Seq[_], distinct: Boolean): AggregateValue = { + if (s.isEmpty) return EmptyValue + + s.headOption match { + case Some(_: Boolean) => + val values = s.asInstanceOf[Seq[Boolean]] + ArrayOfBooleanValue(if (distinct) values.distinct else values) + + case Some(_: Number) => + val values = s.asInstanceOf[Seq[Number]] + ArrayOfNumericValue(if (distinct) values.distinct else values) + + case Some(_: Temporal) => + val values = s.asInstanceOf[Seq[Temporal]] + ArrayOfTemporalValue(if (distinct) values.distinct else values) + + case Some(_: String) => + val values = s.map(_.toString) + ArrayOfStringValue(if (distinct) values.distinct else values) + + case Some(_: Map[_, _]) => + val typedMaps = s.asInstanceOf[Seq[Map[String, Any]]] + val metadataKeys = Set("_id", "_index", "_score", "_sort") + + // Check if all maps have the same single non-metadata key + val nonMetadataKeys = typedMaps.flatMap(_.keys.filterNot(metadataKeys.contains)) + val uniqueKeys = nonMetadataKeys.distinct + + if (uniqueKeys.size == 1) { + // Extract values from the single key + val key = uniqueKeys.head + val extractedValues = typedMaps.flatMap(_.get(key)) + getAggregateValue(extractedValues, distinct) + } else { + // Multiple keys: return as object array + val cleanMaps = typedMaps.map(m => + m.filterNot(k => metadataKeys.contains(k.toString)) + .map(kv => kv._1 -> kv._2) + ) + ArrayOfObjectValue(if (distinct) cleanMaps.distinct else cleanMaps) + } + + case Some(_: Seq[_]) => + // Handle nested sequences (flatten them) + getAggregateValue(s.asInstanceOf[Seq[Seq[_]]].flatten, distinct) + + case _ => EmptyValue + } + } + // Execute the search + search(sqlQuery) + .flatMap { response => + // Parse the response + val parseResult = ElasticResult.fromTry(parseResponse(response)) + + parseResult match { + // Case 1: Parse successful - process the results + case ElasticSuccess(results) => + val aggregationResults = results.flatMap { result => + response.aggregations.map { case (name, aggregation) => + // Attempt to process each aggregation + val aggregationResult = ElasticResult.attempt { + val value = findAggregation(name, result).orNull match { + case b: Boolean => BooleanValue(b) + case n: Number => NumericValue(n) + case s: String => StringValue(s) + case t: Temporal => TemporalValue(t) + case m: Map[_, Any] => ObjectValue(m.map(kv => kv._1.toString -> kv._2)) + case s: Seq[_] if aggregation.multivalued => + getAggregateValue(s, aggregation.distinct) + case _ => EmptyValue + } + + SingleValueAggregateResult(name, aggregation.aggType, value) + } + + // Convert failures to results with errors + aggregationResult match { + case ElasticSuccess(result) => result + case ElasticFailure(error) => + SingleValueAggregateResult( + name, + aggregation.aggType, + EmptyValue, + error = Some(s"Failed to process aggregation: ${error.message}") + ) + } + }.toSeq + } + + ElasticResult.success(aggregationResults) + + // Case 2: Parse failed - returning empty results with errors + case ElasticFailure(error) => + val errorResults = response.aggregations.map { case (name, aggregation) => + SingleValueAggregateResult( + name, + aggregation.aggType, + EmptyValue, + error = Some(s"Parse error: ${error.message}") + ) + }.toSeq + + ElasticResult.success(errorResults) + } + } + .fold( + // If search() fails, throw an exception + onFailure = error => { + throw new IllegalArgumentException( + s"Failed to execute search for SQL query: ${sqlQuery.query}", + error.cause.orNull + ) + }, + onSuccess = results => ElasticResult.success(results) + ) + } + } +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/AggregateResult.scala b/core/src/main/scala/app/softnetwork/elastic/client/AggregateResult.scala index 4b6468cf..81a52b7d 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/AggregateResult.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/AggregateResult.scala @@ -16,7 +16,8 @@ package app.softnetwork.elastic.client -import app.softnetwork.elastic.sql.function.aggregate.AggregateFunction +import java.time.temporal.Temporal +import scala.util.{Failure, Success, Try} sealed trait AggregateResult { def field: String @@ -24,36 +25,121 @@ sealed trait AggregateResult { } sealed trait MetricAgregateResult extends AggregateResult { - def function: AggregateFunction + def aggType: AggregationType.AggregationType } sealed trait AggregateValue -case class NumericValue(value: Double) extends AggregateValue +case class BooleanValue(value: Boolean) extends AggregateValue +case class NumericValue(value: Number) extends AggregateValue case class StringValue(value: String) extends AggregateValue +case class TemporalValue(value: Temporal) extends AggregateValue +case class ObjectValue(value: Map[String, Any]) extends AggregateValue + +sealed trait ArrayAggregateValue[T] extends AggregateValue { + def value: Seq[T] +} +case class ArrayOfBooleanValue(value: Seq[Boolean]) extends ArrayAggregateValue[Boolean] +case class ArrayOfNumericValue(value: Seq[Number]) extends ArrayAggregateValue[Number] +case class ArrayOfStringValue(value: Seq[String]) extends ArrayAggregateValue[String] +case class ArrayOfTemporalValue(value: Seq[Temporal]) extends ArrayAggregateValue[Temporal] +case class ArrayOfObjectValue(value: Seq[Map[String, Any]]) + extends ArrayAggregateValue[Map[String, Any]] + case object EmptyValue extends AggregateValue case class SingleValueAggregateResult( field: String, - function: AggregateFunction, + aggType: AggregationType.AggregationType, value: AggregateValue, error: Option[String] = None ) extends MetricAgregateResult { - def asDoubleOption: Option[Double] = value match { - case NumericValue(v) => Some(v) - case _ => None + + def isEmpty: Boolean = value == EmptyValue + + def hasError: Boolean = error.isDefined + + def getOrElse[T](default: T)(extractor: AggregateValue => Option[T]): T = + extractor(value).getOrElse(default) + + def fold[T]( + onBoolean: Boolean => T, + onNumeric: Number => T, + onString: String => T, + onTemporal: Temporal => T, + onObject: Map[String, Any] => T, + onMulti: Seq[Any] => T, + onEmpty: => T + ): T = value match { + case BooleanValue(v) => onBoolean(v) + case NumericValue(v) => onNumeric(v) + case StringValue(v) => onString(v) + case TemporalValue(v) => onTemporal(v) + case ObjectValue(v) => onObject(v) + case m: ArrayAggregateValue[_] => onMulti(m.value) + case EmptyValue => onEmpty + } + + def asBooleanSafe: Try[Boolean] = value match { + case BooleanValue(v) => Success(v) + case _ => Failure(new ClassCastException(s"Cannot cast $value to Boolean")) } - def asStringOption: Option[String] = value match { - case StringValue(v) => Some(v) - case _ => None + + def asNumericSafe: Try[Number] = value match { + case NumericValue(v) => Success(v) + case _ => Failure(new ClassCastException(s"Cannot cast $value to Double")) + } + + def asDoubleSafe: Try[Double] = asNumericSafe.map(_.doubleValue()) + + def asIntSafe: Try[Int] = asNumericSafe.map(_.intValue()) + + def asLongSafe: Try[Long] = asNumericSafe.map(_.longValue()) + + def asByteSafe: Try[Byte] = asNumericSafe.map(_.byteValue()) + + def asShortSafe: Try[Short] = asNumericSafe.map(_.shortValue()) + + def asStringSafe: Try[String] = value match { + case StringValue(v) => Success(v) + case _ => Failure(new ClassCastException(s"Cannot cast $value to String")) + } + + def asTemporalSafe: Try[Temporal] = value match { + case TemporalValue(v) => Success(v) + case _ => Failure(new ClassCastException(s"Cannot cast $value to Temporal")) + } + + def asMapSafe: Try[Map[String, Any]] = value match { + case ObjectValue(v) => Success(v) + case _ => Failure(new ClassCastException(s"Cannot cast $value to Map")) + } + + def asSeqSafe: Try[Seq[Any]] = value match { + case ArrayOfBooleanValue(v) => Success(v) + case ArrayOfNumericValue(v) => Success(v) + case ArrayOfStringValue(v) => Success(v) + case ArrayOfTemporalValue(v) => Success(v) + case ArrayOfObjectValue(v) => Success(v) + case _ => Failure(new ClassCastException(s"Cannot cast $value to Seq")) } - def isDouble: Boolean = value match { - case NumericValue(_) => true - case _ => false + // Pretty print for debugging + def prettyPrint: String = { + val errorMsg = error.map(e => s" [ERROR: $e]").getOrElse("") + s"$aggType($field) = ${formatValue(value)}$errorMsg" } - def isString: Boolean = value match { - case StringValue(_) => true - case _ => false + private def formatValue(v: AggregateValue): String = v match { + case BooleanValue(b) => b.toString + case NumericValue(n) => n.toString + case StringValue(s) => s""""$s"""" + case TemporalValue(t) => t.toString + case ObjectValue(m) => m.toString + case ArrayOfBooleanValue(s) => s.mkString("[", ", ", "]") + case ArrayOfNumericValue(s) => s.mkString("[", ", ", "]") + case ArrayOfStringValue(s) => s.map(str => s""""$str"""").mkString("[", ", ", "]") + case ArrayOfTemporalValue(s) => s.mkString("[", ", ", "]") + case ArrayOfObjectValue(s) => s.mkString("[", ", ", "]") + case EmptyValue => "null" } } diff --git a/core/src/main/scala/app/softnetwork/elastic/client/AliasApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/AliasApi.scala new file mode 100644 index 00000000..ccc76be6 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/AliasApi.scala @@ -0,0 +1,458 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ + ElasticError, + ElasticFailure, + ElasticResult, + ElasticSuccess +} +import com.google.gson.JsonParser + +import scala.jdk.CollectionConverters._ +import scala.util.Try + +/** Alias management API. + * + * This implementation provides: + * - Adding/removing aliases + * - Existence checking + * - Retrieving aliases from an index + * - Atomic operations (swap) + * - Full parameter validation + * + * ==Elasticsearch rules for aliases== + * + * Aliases follow the same naming rules as indexes: + * - Lowercase only + * - No special characters: \, /, *, ?, ", <, >, |, space, comma, # + * - Does not start with -, _, + + * - Is not . or .. + * - Maximum 255 characters + */ +trait AliasApi extends ElasticClientHelpers { _: IndicesApi => + + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + //format:off + /** Add an alias to an index. + * + * This operation: + * 1. Validates the index and alias names 2. Checks that the index exists 3. Adds the alias + * + * @param index + * the index name + * @param alias + * the alias name to add + * @return + * ElasticSuccess(true) if added, ElasticFailure otherwise + * + * @example + * {{{ + * addAlias("my-index-2024", "my-index-current") match { + * case ElasticSuccess(_) => println("Alias added") + * case ElasticFailure(error) => println(s"Error: ${error.message}") + * } + * }}} + * + * @note + * An alias can point to multiple indexes (useful for searches) + * @note + * An index can have multiple aliases + */ + //format:on + def addAlias(index: String, alias: String): ElasticResult[Boolean] = { + // Validation... + validateIndexName(index) match { + case Some(error) => + return ElasticFailure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + operation = Some("addAlias") + ) + ) + case None => // OK + } + + validateAliasName(alias) match { + case Some(error) => + return ElasticFailure( + error.copy( + message = s"Invalid alias: ${error.message}", + statusCode = Some(400), + operation = Some("addAlias") + ) + ) + case None => // OK + } + + if (index == alias) { + return ElasticFailure( + ElasticError( + message = s"Index and alias cannot have the same name: '$index'", + cause = None, + statusCode = Some(400), + operation = Some("addAlias") + ) + ) + } + + indexExists(index) match { + case ElasticSuccess(false) => + return ElasticFailure( + ElasticError( + message = s"Index '$index' does not exist", + cause = None, + statusCode = Some(404), + operation = Some("addAlias") + ) + ) + case ElasticFailure(error) => return ElasticFailure(error) + case _ => // OK + } + + logger.debug(s"Adding alias '$alias' to index '$index'") + + executeAddAlias(index, alias) match { + case success @ ElasticSuccess(_) => + logger.info(s"✅ Alias '$alias' successfully added to index '$index'") + success + + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to add alias '$alias' to index '$index': ${error.message}") + failure + } + } + + //format:off + /** Remove an alias from an index. + * + * @param index + * the name of the index + * @param alias + * the name of the alias to remove + * @return + * ElasticSuccess(true) if removed, ElasticFailure otherwise + * + * @example + * {{{ + * removeAlias("my-index-2024", "my-index-current") match { + * case ElasticSuccess(_) => println("Alias removed") + * case ElasticFailure(error) => println(s"Error: ${error.message}") + * } + * }}} + * + * @note + * If the alias does not exist, Elasticsearch returns a 404 error + */ + //format:on + def removeAlias(index: String, alias: String): ElasticResult[Boolean] = { + // Validation... + validateIndexName(index) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("removeAlias"), + statusCode = Some(400), + message = s"Invalid index: ${error.message}" + ) + ) + case None => // OK + } + + validateAliasName(alias) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("removeAlias"), + statusCode = Some(400), + message = s"Invalid alias: ${error.message}" + ) + ) + case None => // OK + } + + logger.debug(s"Removing alias '$alias' from index '$index'") + + executeRemoveAlias(index, alias) match { + case success @ ElasticSuccess(_) => + logger.info(s"✅ Alias '$alias' successfully removed from index '$index'") + success + + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to remove alias '$alias' from index '$index': ${error.message}") + failure + } + + } + + //format:off + /** Check if an alias exists. + * + * @param alias + * the name of the alias to check + * @return + * ElasticSuccess(true) if it exists, ElasticSuccess(false) otherwise, ElasticFailure in case + * of error + * + * @example + * {{{ + * aliasExists("my-alias") match { + * case ElasticSuccess(true) => println("Alias exists") + * case ElasticSuccess(false) => println("Alias does not exist") + * case ElasticFailure(error) => println(s"Error: ${error.message}") + * } + * }}} + */ + //format:on + def aliasExists(alias: String): ElasticResult[Boolean] = { + + validateAliasName(alias) match { + case Some(error) => + return ElasticFailure( + error.copy( + message = s"Invalid alias name: ${error.message}", + statusCode = Some(400), + operation = Some("aliasExists") + ) + ) + case None => // OK + } + + logger.debug(s"Checking if alias '$alias' exists") + + executeAliasExists(alias) match { + case success @ ElasticSuccess(exists) => + if (exists) { + logger.info(s"✅ Alias '$alias' exists") + } else { + logger.info(s"✅ Alias '$alias' does not exist") + } + success + + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to check existence of alias '$alias': ${error.message}") + failure + } + } + + //format:off + /** Retrieve all aliases from an index. + * + * @param index + * the index name + * @return + * ElasticResult with the list of aliases + * + * @example + * {{{ + * getAliases("my-index") match { + * case ElasticSuccess(aliases) => println(s"Aliases: ${aliases.mkString(", ")}") + * case ElasticFailure(error) => println(s"Error: ${error.message}") + * } + * + * }}} + */ + //format:on + def getAliases(index: String): ElasticResult[Set[String]] = { + + validateIndexName(index) match { + case Some(error) => + return ElasticFailure( + error.copy( + message = s"Invalid index name: ${error.message}", + statusCode = Some(400), + operation = Some("getAliases") + ) + ) + case None => // OK + } + + logger.debug(s"Getting aliases for index '$index'") + + executeGetAliases(index).flatMap { jsonString => + // ✅ Extracting aliases from JSON + ElasticResult.fromTry( + Try { + new JsonParser().parse(jsonString).getAsJsonObject + } + ) match { + case ElasticFailure(error) => + logger.error(s"❌ Failed to parse aliases JSON for index '$index': ${error.message}") + return ElasticFailure(error) + case ElasticSuccess(rootObj) => + if (!rootObj.has(index)) { + logger.warn(s"Index '$index' not found in response") + return ElasticResult.success(Set.empty[String]) + } + + val indexObj = rootObj.getAsJsonObject(index) + if (indexObj == null) { + logger.warn(s"Index '$index' is null in response") + return ElasticResult.success(Set.empty[String]) + } + + val aliasesObj = indexObj.getAsJsonObject("aliases") + if (aliasesObj == null || aliasesObj.size() == 0) { + logger.debug(s"No aliases found for index '$index'") + ElasticResult.success(Set.empty[String]) + } else { + val aliases = aliasesObj.entrySet().asScala.map(_.getKey).toSet + logger.debug( + s"Found ${aliases.size} alias(es) for index '$index': ${aliases.mkString(", ")}" + ) + ElasticResult.success(aliases) + } + } + } match { + case success @ ElasticSuccess(aliases) => + if (aliases.nonEmpty) + logger.info( + s"✅ Found ${aliases.size} alias(es) for index '$index': ${aliases.mkString(", ")}" + ) + else + logger.info(s"✅ No aliases found for index '$index'") + success + + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to get aliases for index '$index': ${error.message}") + failure + } + } + + //format:off + /** Atomic swap of an alias between two indexes. + * + * This operation is atomic: the alias is removed from oldIndex and added to newIndex in a single + * query, thus avoiding any period when the alias does not exist. This is the recommended + * operation for zero-downtime deployments. + * + * @param oldIndex + * the current index pointed to by the alias + * @param newIndex + * the new index that should point to the alias + * @param alias + * the name of the alias to swap + * @return + * ElasticSuccess(true) if swapped, ElasticFailure otherwise + * + * @example + * {{{ + * // Zero-downtime deployment + * swapAlias(oldIndex = "products-v1", newIndex = "products-v2", alias = "products") match { + * case ElasticSuccess(_) => println("✅ Alias swapped, new version deployed") + * case ElasticFailure(error) => println(s"❌ Error: ${error.message}") + * } + * }}} + * + * @note + * This operation is atomic and therefore preferable to removeAlias + addAlias + */ + //format:on + def swapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): ElasticResult[Boolean] = { + + // ✅ Validation... + validateIndexName(oldIndex) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("swapAlias"), + statusCode = Some(400), + message = s"Invalid old index name: ${error.message}" + ) + ) + case None => // OK + } + + validateIndexName(newIndex) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("swapAlias"), + statusCode = Some(400), + message = s"Invalid new index name: ${error.message}" + ) + ) + case None => // OK + } + + validateAliasName(alias) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("swapAlias"), + statusCode = Some(400), + message = s"Invalid alias name: ${error.message}" + ) + ) + case None => // OK + } + + if (oldIndex == newIndex) { + return ElasticFailure( + ElasticError( + message = s"Old and new index cannot be the same: '$oldIndex'", + cause = None, + statusCode = Some(400), + operation = Some("swapAlias") + ) + ) + } + + logger.info(s"Swapping alias '$alias' from '$oldIndex' to '$newIndex' (atomic operation)") + + // ✅ Atomic operation : remove + add in a single request + executeSwapAlias(oldIndex, newIndex, alias) match { + case success @ ElasticSuccess(_) => + logger.info(s"✅ Alias '$alias' successfully swapped from '$oldIndex' to '$newIndex'") + success + + case failure @ ElasticFailure(error) => + logger.error( + s"❌ Failed to swap alias '$alias' from '$oldIndex' to '$newIndex': ${error.message}" + ) + failure + } + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + private[client] def executeAddAlias(index: String, alias: String): ElasticResult[Boolean] + + private[client] def executeRemoveAlias(index: String, alias: String): ElasticResult[Boolean] + + private[client] def executeAliasExists(alias: String): ElasticResult[Boolean] + + private[client] def executeGetAliases(index: String): ElasticResult[String] + + private[client] def executeSwapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): ElasticResult[Boolean] + +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/BulkApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/BulkApi.scala new file mode 100644 index 00000000..6c8bc6d8 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/BulkApi.scala @@ -0,0 +1,522 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import akka.NotUsed +import akka.actor.ActorSystem +import akka.stream.{FlowShape, Materializer} +import akka.stream.scaladsl.{Balance, Flow, GraphDSL, Merge, Sink, Source} +import app.softnetwork.elastic.client.bulk._ +import app.softnetwork.elastic.client.result.{ElasticResult, ElasticSuccess} +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods.parse + +import java.time.LocalDate +import java.time.format.DateTimeFormatter +import scala.concurrent.{ExecutionContext, Future} + +/** Bulk API for Elasticsearch clients. + */ +trait BulkApi extends BulkTypes with ElasticClientHelpers { + self: RefreshApi with SettingsApi with IndexApi => + + type A = BulkActionType + type R = BulkResultType + + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + /** Bulk with detailed results (successes + failures). + * + * This method provides: + * + * - List of successfully indexed documents + * - List of failed documents with error details + * - Performance metrics + * - Configurable automatic retry + * + * @param items + * : documents to index + * @param toDocument + * : JSON transformation function + * @param indexKey + * : key for the index field + * @param idKey + * : key for the id field + * @param suffixDateKey + * : key for the date field to suffix the index + * @param suffixDatePattern + * : date pattern for the suffix + * @param update + * : true for upsert, false for index + * @param delete + * : true for delete + * @param parentIdKey + * : key for the parent field + * @param callbacks + * : callbacks for events + * @param bulkOptions + * : configuration options + * @return + * Future with detailed results + */ + def bulkWithResult[D]( + items: Iterator[D], + toDocument: D => String, + indexKey: Option[String] = None, + idKey: Option[String] = None, + suffixDateKey: Option[String] = None, + suffixDatePattern: Option[String] = None, + update: Option[Boolean] = None, + delete: Option[Boolean] = None, + parentIdKey: Option[String] = None, + callbacks: BulkCallbacks = BulkCallbacks.default + )(implicit bulkOptions: BulkOptions, system: ActorSystem): Future[BulkResult] = { + + implicit val materializer: Materializer = Materializer(system) + implicit val ec: ExecutionContext = system.dispatcher + + val startTime = System.currentTimeMillis() + var metrics = BulkMetrics(startTime = startTime) + + bulkSource( + items, + toDocument, + indexKey, + idKey, + suffixDateKey, + suffixDatePattern, + update, + delete, + parentIdKey + ) + .runWith(Sink.fold((Set.empty[String], Seq.empty[FailedDocument], Set.empty[String])) { + case ((successIds, failedDocs, indices), Right(successfulDoc)) => + callbacks.onSuccess(successfulDoc.id, successfulDoc.index) + (successIds + successfulDoc.id, failedDocs, indices + successfulDoc.index) + + case ((successIds, failedDocs, indices), Left(failed)) => + callbacks.onFailure(failed) + metrics = metrics.addFailure(failed.error) + (successIds, failedDocs :+ failed, indices + failed.index) + }) + .map { case (successIds, failedDocs, indices) => + metrics = metrics.copy( + endTime = Some(System.currentTimeMillis()), + totalDocuments = successIds.size + failedDocs.size + ) + + val result = BulkResult( + successCount = successIds.size, + successIds = successIds, + failedCount = failedDocs.size, + failedDocuments = failedDocs, + indices = indices, + metrics = metrics.complete + ) + + callbacks.onComplete(result) + + // Refresh indexes if necessary + if (!bulkOptions.disableRefresh) { + indices.foreach(refresh) + } + + result + } + } + + //format:off + /** Source: Akka Streams, which provides real-time results. + * + * Each emitted item is either: + * - Right(id) for success + * - Left(failed) for failure + * + * @example + * {{{ + * bulkSource(items, toDocument) + * .runWith(Sink.foreach { + * case Right(id) => println(s"✅ Success: $id") + * case Left(failed) => println(s"❌ Failed: ${failed.id}") + * }) + * }}} + * @param items + * the documents to index + * @param toDocument + * JSON transformation function + * @param indexKey + * key for the index field + * @param idKey + * key for the id field + * @param suffixDateKey + * date field key to suffix the index + * @param suffixDatePattern + * date pattern for the suffix + * @param update + * true for upsert, false for index + * @param delete + * true to delete + * @param parentIdKey + * parent field key + * @param bulkOptions + * configuration options + * @return + * Source outputting Right(id) or Left(failed) + */ + //format:on + def bulkSource[D]( + items: Iterator[D], + toDocument: D => String, + indexKey: Option[String] = None, + idKey: Option[String] = None, + suffixDateKey: Option[String] = None, + suffixDatePattern: Option[String] = None, + update: Option[Boolean] = None, + delete: Option[Boolean] = None, + parentIdKey: Option[String] = None + )(implicit + bulkOptions: BulkOptions, + system: ActorSystem + ): Source[Either[FailedDocument, SuccessfulDocument], NotUsed] = { + + implicit val materializer: Materializer = Materializer(system) + implicit val ec: ExecutionContext = system.dispatcher + + var metrics = BulkMetrics() + + Source + .fromIterator(() => items) + // ✅ Transformation en BulkItem + .map { item => + toBulkItem( + toDocument, + indexKey, + idKey, + suffixDateKey, + suffixDatePattern, + update, + delete, + parentIdKey, + item + ) + } + // ✅ Settings management (refresh, replicas) + .via( + BulkSettings[BulkItem](bulkOptions.disableRefresh)( + self, + toBulkAction + ) + ) + // ✅ Batch grouping + .grouped(bulkOptions.maxBulkSize) + .map { batch => + metrics = metrics.copy(totalBatches = metrics.totalBatches + 1) + if (metrics.totalBatches % bulkOptions.logEvery == 0) { + logger.info( + s"Processing batch ${metrics.totalBatches} " + + s"(${metrics.totalDocuments} docs, ${metrics.throughput} docs/sec)" + ) + } + batch + } + // ✅ Conversion to BulkActionType + .map(_.map(toBulkAction)) + // ✅ Execution of bulk with parallelism + .via(balancedBulkFlow(bulkOptions.balance)) + // ✅ Extracting results with original batch + .mapConcat { case (result, originalBatch) => + extractBulkResults(result, originalBatch) + } + // ✅ Automatic retry of failures + .mapAsync(1) { + case success @ Right(_) => + Future.successful(success) + + case failure @ Left(failed) if bulkOptions.retryOnFailure && failed.retryable => + retryWithBackoff(failed, bulkOptions.maxRetries) + .map { + case true => Right(SuccessfulDocument(id = failed.id, index = failed.index)) + case false => failure + } + + case failure => + Future.successful(failure) + } + } + + /** Backward compatible API (old signature). + * + * @deprecated + * Use `bulkWithResult` to get failure details + */ + def bulk[D]( + items: Iterator[D], + toDocument: D => String, + indexKey: Option[String] = None, + idKey: Option[String] = None, + suffixDateKey: Option[String] = None, + suffixDatePattern: Option[String] = None, + update: Option[Boolean] = None, + delete: Option[Boolean] = None, + parentIdKey: Option[String] = None + )(implicit bulkOptions: BulkOptions, system: ActorSystem): ElasticResult[BulkResult] = { + import scala.concurrent.Await + import scala.concurrent.duration._ + + val result = Await.result( + bulkWithResult( + items, + toDocument, + indexKey, + idKey, + suffixDateKey, + suffixDatePattern, + update, + delete, + parentIdKey + ), + Duration.Inf + ) + + // Refresh indexes if necessary + if (!bulkOptions.disableRefresh) { + result.indices.foreach(refresh) + } + + ElasticResult.success(result) + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + implicit private[client] def toBulkElasticAction(a: BulkActionType): BulkElasticAction + + /** Basic flow for executing a bulk action. This method must be implemented by concrete classes + * depending on the Elasticsearch version and client used. + * + * @param bulkOptions + * configuration options + * @return + * Flow transforming bulk actions into results + */ + private[client] def bulkFlow(implicit + bulkOptions: BulkOptions, + system: ActorSystem + ): Flow[Seq[BulkActionType], BulkResultType, NotUsed] + + /** Convert a BulkResultType into individual results. This method must extract the successes and + * failures from the ES response. + * + * @param result + * raw result from the bulk + * @return + * sequence of Right(id) for success or Left(failed) for failure + */ + private[client] def extractBulkResults( + result: BulkResultType, + originalBatch: Seq[BulkItem] + ): Seq[Either[FailedDocument, SuccessfulDocument]] + + /** Conversion BulkItem -> BulkActionType */ + private[client] def toBulkAction(bulkItem: BulkItem): BulkActionType + + /** Conversion BulkActionType -> BulkItem */ + private[client] def actionToBulkItem(action: BulkActionType): BulkItem + + // ======================================================================== + // PRIVATE HELPERS + // ======================================================================== + + /** Flow with balance for parallelism */ + private def balancedBulkFlow( + parallelism: Int + )(implicit + bulkOptions: BulkOptions, + system: ActorSystem + ): Flow[Seq[BulkActionType], (BulkResultType, Seq[BulkItem]), NotUsed] = { + + implicit val ec: ExecutionContext = system.dispatcher + + if (parallelism > 1) { + Flow.fromGraph(GraphDSL.create() { implicit b => + import GraphDSL.Implicits._ + + val balancer = b.add(Balance[Seq[BulkActionType]](parallelism)) + val merge = b.add(Merge[(BulkResultType, Seq[BulkItem])](parallelism)) + + // ✅ Keep the original batch for extracting the results + val bulkFlowWithOriginal = Flow[Seq[BulkActionType]] + .map(batch => (batch, batch.map(actionToBulkItem))) + .mapAsync(1) { case (actions, originalBatch) => + // Run the bulk via bulkFlow + Source + .single(actions) + .via(bulkFlow) + .runWith(Sink.head) + .map(result => (result, originalBatch)) + } + + balancer ~> bulkFlowWithOriginal ~> merge + + 1 until parallelism foreach { _ => + balancer ~> bulkFlowWithOriginal ~> merge + } + + FlowShape(balancer.in, merge.out) + }) + } else { + Flow[Seq[BulkActionType]] + .map(batch => (batch, batch.map(actionToBulkItem))) + .mapAsync(1) { case (actions, originalBatch) => + Source + .single(actions) + .via(bulkFlow) + .runWith(Sink.head) + .map(result => (result, originalBatch)) + } + } + } + + /** Retry with exponential backoff */ + private def retryWithBackoff( + failed: FailedDocument, + maxRetries: Int, + currentRetry: Int = 0 + )(implicit + bulkOptions: BulkOptions, + system: ActorSystem + ): Future[Boolean] = { + + implicit val ec: ExecutionContext = system.dispatcher + + if (currentRetry >= maxRetries) { + logger.warn(s"Max retries ($maxRetries) reached for document ${failed.id}") + return Future.successful(false) + } + + val delay = bulkOptions.retryDelay * Math + .pow( + bulkOptions.retryBackoffMultiplier, + currentRetry + ) + .toLong + + logger.info( + s"Retrying document ${failed.id} (attempt ${currentRetry + 1}/$maxRetries) " + + s"after ${delay.toMillis}ms" + ) + + akka.pattern.after(delay, system.scheduler) { + executeSingleDocument(failed) + .flatMap { + case true => + logger.info(s"✅ Successfully retried document ${failed.id}") + Future.successful(true) + + case false => + logger.warn(s"❌ Retry failed for document ${failed.id}") + retryWithBackoff(failed, maxRetries, currentRetry + 1) + } + .recoverWith { case ex: Throwable => + logger.error(s"Exception during retry of ${failed.id}: ${ex.getMessage}") + retryWithBackoff(failed, maxRetries, currentRetry + 1) + } + } + } + + /** Execute a single document (for retry). + * + * @param failed + * failed document to retry + * @return + * Future[Boolean] indicating success + */ + private def executeSingleDocument( + failed: FailedDocument + )(implicit system: ActorSystem): Future[Boolean] = { + implicit val ec: ExecutionContext = system.dispatcher + indexAsync(failed.index, failed.document, failed.id).flatMap { + case ElasticSuccess(true) => + Future.successful(true) + + case _ => + Future.successful(false) + } + } + + private def toBulkItem[D]( + toDocument: D => String, + indexKey: Option[String], + idKey: Option[String], + suffixDateKey: Option[String], + suffixDatePattern: Option[String], + update: Option[Boolean], + delete: Option[Boolean], + parentIdKey: Option[String], + item: D + )(implicit bulkOptions: BulkOptions): BulkItem = { + implicit val formats: DefaultFormats = org.json4s.DefaultFormats + val document = toDocument(item) + val jsonMap = parse(document, useBigDecimalForDouble = false).extract[Map[String, Any]] + + // extract id + val id = idKey.flatMap { i => + jsonMap.get(i).map(_.toString) + } + + // extract final index name + val indexFromKeyOrOptions = indexKey + .flatMap { i => + jsonMap.get(i).map(_.toString) + } + .getOrElse(bulkOptions.defaultIndex) + val index = suffixDateKey + .flatMap { s => + jsonMap.get(s).map { d => + val strDate = d.toString.substring(0, 10) + val date = LocalDate.parse(strDate, DateTimeFormatter.ofPattern("yyyy-MM-dd")) + date.format( + suffixDatePattern + .map(DateTimeFormatter.ofPattern) + .getOrElse(DateTimeFormatter.ofPattern("yyyy-MM-dd")) + ) + } + } + .map(s => s"$indexFromKeyOrOptions-$s") + .getOrElse(indexFromKeyOrOptions) + + // extract parent key + val parent = parentIdKey.flatMap { i => + jsonMap.get(i).map(_.toString) + } + + val action = delete match { + case Some(d) if d => BulkAction.DELETE + case _ => + update match { + case Some(u) if u => BulkAction.UPDATE + case _ => BulkAction.INDEX + } + } + + BulkItem(index, action, document, id, parent) + } + +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/CountApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/CountApi.scala new file mode 100644 index 00000000..4c36c7a6 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/CountApi.scala @@ -0,0 +1,124 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ + ElasticError, + ElasticFailure, + ElasticResult, + ElasticSuccess +} + +import scala.concurrent.{ExecutionContext, Future} + +trait CountApi extends ElasticClientHelpers { + + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + /** Count the number of documents matching the given JSON query. + * + * @param query + * - the query to count the documents for + * @return + * the number of documents matching the query, or None if the count could not be determined + */ + def count(query: ElasticQuery): ElasticResult[Option[Double]] = { + query.indices.flatMap { index => + validateIndexName(index).map(error => index -> error.message) + } match { + case errors if errors.nonEmpty => + return ElasticResult.failure( + ElasticError( + message = s"Invalid indices: ${errors.map(_._2).mkString(",")}", + statusCode = Some(400), + index = Some(errors.map(_._1).mkString(",")), + operation = Some("count") + ) + ) + case _ => // continue + } + + validateJson("count", query.query) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid query: ${error.message}", + statusCode = Some(400), + index = Some(query.indices.mkString(",")), + operation = Some("count") + ) + ) + case None => // continue + } + + logger.debug( + s"Counting documents matching query '${query.query}' in indices '${query.indices.mkString(",")}'" + ) + + executeCount(query) match { + case success @ ElasticSuccess(Some(count)) => + logger.info( + s"✅ Successfully counted $count documents matching query in indices '${query.indices.mkString(",")}'" + ) + success + case _ @ElasticSuccess(None) => + val error = + ElasticError( + message = + s"Could not determine count of documents matching query in indices '${query.indices.mkString(",")}'", + index = Some(query.indices.mkString(",")), + operation = Some("count") + ) + logger.error(s"❌ ${error.message}") + ElasticResult.failure(error) + case failure @ ElasticFailure(error) => + logger.error( + s"❌ Failed to count documents matching query in indices '${query.indices.mkString(",")}': ${error.message}" + ) + failure + } + } + + /** Count the number of documents matching the given JSON query asynchronously. + * + * @param query + * - the query to count the documents for + * @return + * the number of documents matching the query, or None if the count could not be determined + */ + def countAsync( + query: ElasticQuery + )(implicit ec: ExecutionContext): Future[ElasticResult[Option[Double]]] = { + Future { + this.count(query) + } + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + private[client] def executeCount( + query: ElasticQuery + ): ElasticResult[Option[Double]] + + private[client] def executeCountAsync( + query: ElasticQuery + )(implicit ec: ExecutionContext): Future[ElasticResult[Option[Double]]] +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/DeleteApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/DeleteApi.scala new file mode 100644 index 00000000..8e610038 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/DeleteApi.scala @@ -0,0 +1,140 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ + ElasticError, + ElasticFailure, + ElasticResult, + ElasticSuccess +} + +import scala.concurrent.{ExecutionContext, Future, Promise} +import scala.util.{Failure, Success} + +/** Delete Management API + */ +trait DeleteApi extends ElasticClientHelpers { _: RefreshApi => + + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + /** Delete an entity from the given index. + * @param id + * - the id of the entity to delete + * @param index + * - the name of the index to delete the entity from + * @return + * true if the entity was deleted successfully, false otherwise + */ + def delete(id: String, index: String): ElasticResult[Boolean] = { + validateIndexName(index) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("delete") + ) + ) + case None => // continue + } + + logger.debug(s"Deleting document with id '$id' from index '$index'") + + executeDelete(index, id) match { + case _ @ElasticSuccess(true) => + logger.info(s"✅ Successfully deleted document with id '$id' from index '$index'") + this.refresh(index) + case success @ ElasticSuccess(_) => + logger.info(s"✅ Document with id '$id' not found in index '$index'") + success + case failure @ ElasticFailure(error) => + logger.error( + s"❌ Failed to delete document with id '$id' from index '$index': ${error.message}" + ) + failure + } + } + + /** Delete an entity from the given index asynchronously. + * @param id + * - the id of the entity to delete + * @param index + * - the name of the index to delete the entity from + * @return + * a Future that completes with true if the entity was deleted successfully, false otherwise + */ + def deleteAsync(id: String, index: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = { + validateIndexName(index) match { + case Some(error) => + return Future.successful( + ElasticResult.failure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("deleteAsync") + ) + ) + ) + case None => // continue + } + + logger.debug(s"Deleting asynchronously document with id '$id' from index '$index'") + + val promise: Promise[ElasticResult[Boolean]] = Promise() + executeDeleteAsync(index, id) onComplete { + case Success(s) => + s match { + case _ @ElasticSuccess(true) => + logger.info(s"✅ Successfully deleted document with id '$id' from index '$index'") + promise.success(this.refresh(index)) + case success @ ElasticSuccess(_) => + logger.warn(s"❌ Document with id '$id' in index '$index' not deleted") + promise.success(success) + case failure @ ElasticFailure(error) => + logger.error(s"❌ ${error.message}") + promise.success(failure) + } + case Failure(exception) => + val error = ElasticError( + message = + s"Exception while deleting document with id '$id' from index '$index': ${exception.getMessage}", + operation = Some("deleteAsync"), + index = Some(index) + ) + logger.error(s"❌ ${error.message}") + promise.success(ElasticResult.failure(error)) + } + promise.future + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + private[client] def executeDelete(index: String, id: String): ElasticResult[Boolean] + + private[client] def executeDeleteAsync(index: String, id: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] +} diff --git a/es8/java/src/main/scala/app/softnetwork/elastic/persistence/query/ElasticsearchClientProvider.scala b/core/src/main/scala/app/softnetwork/elastic/client/DiscoveryConfig.scala similarity index 61% rename from es8/java/src/main/scala/app/softnetwork/elastic/persistence/query/ElasticsearchClientProvider.scala rename to core/src/main/scala/app/softnetwork/elastic/client/DiscoveryConfig.scala index 33b2bd05..159118cc 100644 --- a/es8/java/src/main/scala/app/softnetwork/elastic/persistence/query/ElasticsearchClientProvider.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/DiscoveryConfig.scala @@ -14,15 +14,11 @@ * limitations under the License. */ -package app.softnetwork.elastic.persistence.query +package app.softnetwork.elastic.client -import app.softnetwork.elastic.client.java.ElasticsearchClientApi -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.model.Timestamped +import java.time.Duration -trait ElasticsearchClientProvider[T <: Timestamped] - extends ElasticProvider[T] - with ElasticsearchClientApi { - _: ManifestWrapper[T] => - -} +case class DiscoveryConfig( + enabled: Boolean, + frequency: Duration +) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientApi.scala index dc8206cf..53d4749d 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientApi.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientApi.scala @@ -16,27 +16,13 @@ package app.softnetwork.elastic.client -import java.time.LocalDate -import java.time.format.DateTimeFormatter -import akka.NotUsed -import akka.actor.ActorSystem -import _root_.akka.stream.{FlowShape, Materializer} -import akka.stream.scaladsl._ -import app.softnetwork.persistence.model.Timestamped -import app.softnetwork.serialization._ -import app.softnetwork.elastic.sql.query.{SQLQuery, SQLSearchRequest} -import com.google.gson.JsonParser import com.typesafe.config.{Config, ConfigFactory} -import org.json4s.{DefaultFormats, Formats} -import org.json4s.jackson.JsonMethods._ +import org.json4s.jackson +import org.json4s.jackson.Serialization import org.slf4j.Logger -import java.util.UUID -import scala.concurrent.{Await, ExecutionContext, Future} -import scala.concurrent.duration.Duration -import scala.language.{implicitConversions, postfixOps} -import scala.reflect.ClassTag -import scala.util.{Failure, Success, Try} +import java.io.Closeable +import scala.language.{implicitConversions, postfixOps, reflectiveCalls} /** Created by smanciot on 28/06/2018. */ @@ -46,1152 +32,27 @@ trait ElasticClientApi with AliasApi with MappingApi with CountApi - with SingleValueAggregateApi with SearchApi + with SingleValueAggregateApi + with ScrollApi with IndexApi with UpdateApi with GetApi with BulkApi with DeleteApi with RefreshApi - with FlushApi { - - def config: Config = ConfigFactory.load() - - final lazy val elasticConfig: ElasticConfig = ElasticConfig(config) -} - -trait IndicesApi { - - /** Default settings for indices. This is used when creating an index without providing specific - * settings. It includes ngram tokenizer and analyzer, as well as some default limits. - */ - val defaultSettings: String = - """ - |{ - | "index": { - | "max_ngram_diff": "20", - | "mapping" : { - | "total_fields" : { - | "limit" : "2000" - | } - | }, - | "analysis": { - | "analyzer": { - | "ngram_analyzer": { - | "tokenizer": "ngram_tokenizer", - | "filter": [ - | "lowercase", - | "asciifolding" - | ] - | }, - | "search_analyzer": { - | "type": "custom", - | "tokenizer": "standard", - | "filter": [ - | "lowercase", - | "asciifolding" - | ] - | } - | }, - | "tokenizer": { - | "ngram_tokenizer": { - | "type": "ngram", - | "min_gram": 1, - | "max_gram": 20, - | "token_chars": [ - | "letter", - | "digit" - | ] - | } - | } - | } - | } - |} - """.stripMargin - - /** Create an index with the provided name and settings. - * @param index - * - the name of the index to create - * @param settings - * - the settings to apply to the index (default is defaultSettings) - * @return - * true if the index was created successfully, false otherwise - */ - def createIndex(index: String, settings: String = defaultSettings): Boolean - - /** Delete an index with the provided name. - * @param index - * - the name of the index to delete - * @return - * true if the index was deleted successfully, false otherwise - */ - def deleteIndex(index: String): Boolean - - /** Close an index with the provided name. - * @param index - * - the name of the index to close - * @return - * true if the index was closed successfully, false otherwise - */ - def closeIndex(index: String): Boolean - - /** Open an index with the provided name. - * @param index - * - the name of the index to open - * @return - * true if the index was opened successfully, false otherwise - */ - def openIndex(index: String): Boolean - - /** Reindex from source index to target index. - * @param sourceIndex - * - the name of the source index - * @param targetIndex - * - the name of the target index - * @param refresh - * - true to refresh the target index after reindexing, false otherwise - * @return - * true if the reindexing was successful, false otherwise - */ - def reindex(sourceIndex: String, targetIndex: String, refresh: Boolean = true): Boolean - - /** Check if an index exists. - * @param index - * - the name of the index to check - * @return - * true if the index exists, false otherwise - */ - def indexExists(index: String): Boolean -} - -trait AliasApi { - - /** Add an alias to the given index. - * @param index - * - the name of the index - * @param alias - * - the name of the alias - * @return - * true if the alias was added successfully, false otherwise - */ - def addAlias(index: String, alias: String): Boolean - - /** Remove an alias from the given index. - * @param index - * - the name of the index - * @param alias - * - the name of the alias - * @return - * true if the alias was removed successfully, false otherwise - */ - def removeAlias(index: String, alias: String): Boolean -} - -trait SettingsApi { _: IndicesApi => - - /** Toggle the refresh interval of an index. - * @param index - * - the name of the index - * @param enable - * - true to enable the refresh interval, false to disable it - * @return - * true if the settings were updated successfully, false otherwise - */ - def toggleRefresh(index: String, enable: Boolean): Boolean = { - updateSettings( - index, - if (!enable) """{"index" : {"refresh_interval" : -1} }""" - else """{"index" : {"refresh_interval" : "1s"} }""" - ) - } - - /** Set the number of replicas for an index. - * @param index - * - the name of the index - * @param replicas - * - the number of replicas to set - * @return - * true if the settings were updated successfully, false otherwise - */ - def setReplicas(index: String, replicas: Int): Boolean = { - updateSettings(index, s"""{"index" : {"number_of_replicas" : $replicas} }""") - } - - /** Update the settings of an index. - * @param index - * - the name of the index - * @param settings - * - the settings to apply to the index (default is defaultSettings) - * @return - * true if the settings were updated successfully, false otherwise - */ - def updateSettings(index: String, settings: String = defaultSettings): Boolean - - /** Load the settings of an index. - * @param index - * - the name of the index to load the settings for - * @return - * the settings of the index as a JSON string - */ - def loadSettings(index: String): String -} - -trait MappingApi extends IndicesApi with RefreshApi { + with FlushApi + with VersionApi + with SerializationApi + with Closeable { protected def logger: Logger - /** Set the mapping of an index. - * @param index - * - the name of the index to set the mapping for - * @param mapping - * - the mapping to set on the index - * @return - * true if the mapping was set successfully, false otherwise - */ - def setMapping(index: String, mapping: String): Boolean - - /** Get the mapping of an index. - * @param index - * - the name of the index to get the mapping for - * @return - * the mapping of the index as a JSON string - */ - def getMapping(index: String): String - - /** Get the mapping properties of an index. - * @param index - * - the name of the index to get the mapping properties for - * @return - * the mapping properties of the index as a JSON string - */ - def getMappingProperties(index: String): String = { - val mapping = tryOrElse(getMapping(index), "{\"mappings\": {\"properties\": {}}}")(logger) - Try( - new JsonParser() - .parse(mapping) - .getAsJsonObject - .get("mappings") - .toString - ) match { - case Success(properties) => properties - case Failure(exception) => - logger.error(s"Failed to parse mapping properties for index $index and $mapping", exception) - "{\"properties\": {}}" // Return an empty properties object in case of failure - } - } - - /** Check if the mapping of an index is different from the provided mapping. - * @param index - * - the name of the index to check - * @param mapping - * - the mapping to compare with the current mapping of the index - * @return - * true if the mapping is different, false otherwise - */ - def shouldUpdateMapping( - index: String, - mapping: String - ): Boolean = { - MappingComparator.isMappingDifferent(this.getMappingProperties(index), mapping) - } - - /** Update the mapping of an index to a new mapping. - * @param index - * - the name of the index to migrate - * @param mapping - * - the new mapping to set on the index - * @param settings - * - the settings to apply to the index (default is defaultSettings) - * @return - * true if the mapping was updated successfully, false otherwise - */ - def updateMapping( - index: String, - mapping: String, - settings: String = defaultSettings - ): Boolean = { - // Check if the index exists - if (!tryOrElse(this.indexExists(index), false)(logger)) { - if (!tryOrElse(this.createIndex(index, settings), false)(logger)) { - logger.error(s"Failed to create index: $index") - return false - } - logger.info(s"Index $index created successfully.") - if (!tryOrElse(this.setMapping(index, mapping), false)(logger)) { - logger.error(s"Failed to set mapping for index: $index") - return false - } - logger.info(s"Mapping for index $index set successfully.") - true - } - // Check if the mapping needs to be updated - else if (shouldUpdateMapping(index, mapping)) { - val tempIndex = index + "_tmp_" + UUID.randomUUID() - var tempCreated = false - var originalDeleted = false - logger.info("--- Starting dynamic mapping migration ---") - logger.info("Target index: " + index) - logger.info("Temporary index: " + tempIndex) - def migrate(): Boolean = { - // Create a temporary index with the new mapping - tempCreated = tryOrElse(this.createIndex(tempIndex, settings), false)(logger) - if (tempCreated) { - logger.info(s"Temporary index $tempIndex created successfully.") - // Set the new mapping on the temporary index - if (!tryOrElse(this.setMapping(tempIndex, mapping), false)(logger)) { - logger.error(s"Failed to set mapping for temporary index: $tempIndex") - return false - } - logger.info(s"Mapping for temporary index $tempIndex set successfully.") - // Reindex from the original index to the temporary index - if (!tryOrElse(this.reindex(index, tempIndex), false)(logger)) { - logger.error( - s"Failed to reindex from original index: $index to temporary index: $tempIndex" - ) - return false - } - logger.info( - s"Reindexing from original index $index to temporary index $tempIndex completed successfully." - ) - // Delete the original index - originalDeleted = this.deleteIndex(index) - if (originalDeleted) { - logger.info(s"Original index $index deleted successfully.") - // Rename the temporary index to the original index name - if (!tryOrElse(this.createIndex(index, settings), false)(logger)) { - logger.error(s"Failed to recreate original index: $index") - return false - } - logger.info(s"Original index $index recreated successfully.") - if (!tryOrElse(this.setMapping(index, mapping), false)(logger)) { - logger.error(s"Failed to set mapping for original index: $index") - return false - } - logger.info(s"Mapping for original index $index set successfully.") - if (!tryOrElse(this.reindex(tempIndex, index), false)(logger)) { - logger.error( - s"Failed to reindex from temporary index: $tempIndex to original index: $index" - ) - return false - } - logger.info( - s"Reindexing from temporary index $tempIndex to original index $index completed successfully." - ) - if (!tryOrElse(this.openIndex(index), false)(logger)) { - logger.error(s"Failed to open original index: $index") - return false - } - logger.info(s"Original index $index opened successfully.") - logger.info("Dynamic mapping migration completed successfully.") - true - } else { - logger.error(s"Failed to delete original index: $index") - false - } - } else { - logger.error(s"Failed to create temporary index: $tempIndex") - false - } - } - val migration = Try(migrate()) match { - case Success(result) => result - case Failure(exception) => - logger.error("Exception during dynamic mapping migration", exception) - false - } - if (!migration) { - logger.error("Error during dynamic mapping migration") - if (originalDeleted) { - // If the original index was deleted, we need to recreate it - if (!tryOrElse(this.createIndex(index, settings), false)(logger)) { - logger.error(s"Failed to recreate original index: $index") - } else { - logger.info(s"Original index $index recreated successfully.") - // Set the original mapping back - if (!tryOrElse(this.setMapping(index, mapping), false)(logger)) { - logger.error(s"Failed to set mapping for original index: $index") - } else { - logger.info(s"Mapping for original index $index set successfully.") - if (!tryOrElse(this.reindex(tempIndex, index), false)(logger)) { - logger.error( - s"Failed to reindex from temporary index $tempIndex to original index $index" - ) - } else { - logger.info( - s"Reindexing from temporary index $tempIndex to original index $index completed successfully." - ) - if (!tryOrElse(this.refresh(index), false)(logger)) { - logger.error(s"Failed to refresh original index: $index") - } else { - logger.info(s"Original index $index refreshed successfully.") - } - } - } - } - } - } - if (tempCreated) { - // Clean up the temporary index if it was created - if (!tryOrElse(this.deleteIndex(tempIndex), false)(logger)) { - logger.error(s"Failed to delete temporary index: $tempIndex") - } else { - logger.info(s"Temporary index $tempIndex deleted successfully.") - } - } else { - logger.error(s"Temporary index $tempIndex was not created, skipping deletion.") - } - migration - } else { - false - } - } -} - -trait RefreshApi { - - /** Refresh the index to make sure all documents are indexed and searchable. - * @param index - * - the name of the index to refresh - * @return - * true if the index was refreshed successfully, false otherwise - */ - def refresh(index: String): Boolean -} - -trait FlushApi { - - /** Flush the index to make sure all operations are written to disk. - * @param index - * - the name of the index to flush - * @param force - * - true to force the flush, false otherwise - * @param wait - * - true to wait for the flush to complete, false otherwise - * @return - * true if the index was flushed successfully, false otherwise - */ - def flush(index: String, force: Boolean = true, wait: Boolean = true): Boolean -} - -trait IndexApi { _: RefreshApi => - - /** Index an entity in the given index. - * @param entity - * - the entity to index - * @param index - * - the name of the index to index the entity in (default is the entity type name) - * @param maybeType - * - the type of the entity (default is the entity class name in lowercase) - * @return - * true if the entity was indexed successfully, false otherwise - */ - def index[U <: Timestamped]( - entity: U, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit u: ClassTag[U], formats: Formats): Boolean = { - val indexType = maybeType.getOrElse(u.runtimeClass.getSimpleName.toLowerCase) - this.index( - index.getOrElse(indexType), - entity.uuid, - serialization.write[U](entity) - ) - } - - /** Index an entity in the given index. - * @param index - * - the name of the index to index the entity in - * @param id - * - the id of the entity to index - * @param source - * - the source of the entity to index in JSON format - * @return - * true if the entity was indexed successfully, false otherwise - */ - def index(index: String, id: String, source: String): Boolean - - /** Index an entity in the given index asynchronously. - * @param entity - * - the entity to index - * @param index - * - the name of the index to index the entity in (default is the entity type name) - * @param maybeType - * - the type of the entity (default is the entity class name in lowercase) - * @return - * a Future that completes with true if the entity was indexed successfully, false otherwise - */ - def indexAsync[U <: Timestamped]( - entity: U, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit u: ClassTag[U], ec: ExecutionContext, formats: Formats): Future[Boolean] = { - val indexType = maybeType.getOrElse(u.runtimeClass.getSimpleName.toLowerCase) - indexAsync(index.getOrElse(indexType), entity.uuid, serialization.write[U](entity)) - } - - /** Index an entity in the given index asynchronously. - * @param index - * - the name of the index to index the entity in - * @param id - * - the id of the entity to index - * @param source - * - the source of the entity to index in JSON format - * @return - * a Future that completes with true if the entity was indexed successfully, false otherwise - */ - def indexAsync(index: String, id: String, source: String)(implicit - ec: ExecutionContext - ): Future[Boolean] = { - Future { - this.index(index, id, source) - } - } -} - -trait UpdateApi { _: RefreshApi => - - /** Update an entity in the given index. - * @param entity - * - the entity to update - * @param index - * - the name of the index to update the entity in (default is the entity type name) - * @param maybeType - * - the type of the entity (default is the entity class name in lowercase) - * @param upsert - * - true to upsert the entity if it does not exist, false otherwise - * @return - * true if the entity was updated successfully, false otherwise - */ - def update[U <: Timestamped]( - entity: U, - index: Option[String] = None, - maybeType: Option[String] = None, - upsert: Boolean = true - )(implicit u: ClassTag[U], formats: Formats): Boolean = { - val indexType = maybeType.getOrElse(u.runtimeClass.getSimpleName.toLowerCase) - this.update( - index.getOrElse(indexType), - entity.uuid, - serialization.write[U](entity), - upsert - ) - } - - /** Update an entity in the given index. - * @param index - * - the name of the index to update the entity in - * @param id - * - the id of the entity to update - * @param source - * - the source of the entity to update in JSON format - * @param upsert - * - true to upsert the entity if it does not exist, false otherwise - * @return - * true if the entity was updated successfully, false otherwise - */ - def update(index: String, id: String, source: String, upsert: Boolean): Boolean - - /** Update an entity in the given index asynchronously. - * @param entity - * - the entity to update - * @param index - * - the name of the index to update the entity in (default is the entity type name) - * @param maybeType - * - the type of the entity (default is the entity class name in lowercase) - * @param upsert - * - true to upsert the entity if it does not exist, false otherwise - * @return - * a Future that completes with true if the entity was updated successfully, false otherwise - */ - def updateAsync[U <: Timestamped]( - entity: U, - index: Option[String] = None, - maybeType: Option[String] = None, - upsert: Boolean = true - )(implicit u: ClassTag[U], ec: ExecutionContext, formats: Formats): Future[Boolean] = { - val indexType = maybeType.getOrElse(u.runtimeClass.getSimpleName.toLowerCase) - this - .updateAsync( - index.getOrElse(indexType), - entity.uuid, - serialization.write[U](entity), - upsert - ) - } - - /** Update an entity in the given index asynchronously. - * @param index - * - the name of the index to update the entity in - * @param id - * - the id of the entity to update - * @param source - * - the source of the entity to update in JSON format - * @param upsert - * - true to upsert the entity if it does not exist, false otherwise - * @return - * a Future that completes with true if the entity was updated successfully, false otherwise - */ - def updateAsync(index: String, id: String, source: String, upsert: Boolean)(implicit - ec: ExecutionContext - ): Future[Boolean] = { - Future { - this.update(index, id, source, upsert) - } - } -} - -trait DeleteApi { _: RefreshApi => - - /** Delete an entity from the given index. - * @param entity - * - the entity to delete - * @param index - * - the name of the index to delete the entity from (default is the entity type name) - * @param maybeType - * - the type of the entity (default is the entity class name in lowercase) - * @return - * true if the entity was deleted successfully, false otherwise - */ - def delete[U <: Timestamped]( - entity: U, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit u: ClassTag[U]): Boolean = { - val indexType = maybeType.getOrElse(u.runtimeClass.getSimpleName.toLowerCase) - delete(entity.uuid, index.getOrElse(indexType)) - } - - /** Delete an entity from the given index. - * @param uuid - * - the id of the entity to delete - * @param index - * - the name of the index to delete the entity from - * @return - * true if the entity was deleted successfully, false otherwise - */ - def delete(uuid: String, index: String): Boolean - - /** Delete an entity from the given index asynchronously. - * @param entity - * - the entity to delete - * @param index - * - the name of the index to delete the entity from (default is the entity type name) - * @param maybeType - * - the type of the entity (default is the entity class name in lowercase) - * @return - * a Future that completes with true if the entity was deleted successfully, false otherwise - */ - def deleteAsync[U <: Timestamped]( - entity: U, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit u: ClassTag[U], ec: ExecutionContext): Future[Boolean] = { - val indexType = maybeType.getOrElse(u.runtimeClass.getSimpleName.toLowerCase) - deleteAsync(entity.uuid, index.getOrElse(indexType)) - } - - /** Delete an entity from the given index asynchronously. - * @param uuid - * - the id of the entity to delete - * @param index - * - the name of the index to delete the entity from - * @return - * a Future that completes with true if the entity was deleted successfully, false otherwise - */ - def deleteAsync(uuid: String, index: String)(implicit - ec: ExecutionContext - ): Future[Boolean] = { - Future { - this.delete(uuid, index) - } - } - -} - -trait BulkApi { _: RefreshApi with SettingsApi => - type A - type R - - def toBulkAction(bulkItem: BulkItem): A - - implicit def toBulkElasticAction(a: A): BulkElasticAction - - implicit def toBulkElasticResult(r: R): BulkElasticResult - - def bulk(implicit bulkOptions: BulkOptions, system: ActorSystem): Flow[Seq[A], R, NotUsed] - - def bulkResult: Flow[R, Set[String], NotUsed] - - /** +----------+ - * | | - * | Source | items: Iterator[D] - * | | - * +----------+ - * | - * v - * +----------+ - * | | - * |transform | BulkableAction - * | | - * +----------+ - * | - * v - * +----------+ - * | | - * | settings | Update elasticsearch settings (refresh and replicas) - * | | - * +----------+ - * | - * v - * +----------+ - * | | - * | group | - * | | - * +----------+ - * | - * v - * +----------+ +----------+ - * | |------->| | - * | balance | | bulk | - * | |------->| | - * +----------+ +----------+ - * | | - * | | - * | | - * +---------+ | | - * | |<-----------' | - * | merge | | - * | |<----------------' - * +---------+ - * | - * v - * +----------+ - * | | - * | result | BulkResult - * | | - * +----------+ - * | - * v - * +----------+ - * | | - * | Sink | indices: Set[String] - * | | - * +----------+ - * - * Asynchronously bulk items to Elasticsearch - * - * @param items the items for which a bulk has to be performed - * @param toDocument the function to transform items to elastic documents in json format - * @param idKey the key mapping to the document id - * @param suffixDateKey the key mapping to the date used to suffix the index - * @param suffixDatePattern the date pattern used to suffix the index - * @param update whether to upsert or not the items - * @param delete whether to delete or not the items - * @param parentIdKey the key mapping to the elastic parent document id - * @param bulkOptions bulk options - * @param system actor system - * @tparam D the type of the items - * @return the indexes on which the documents have been indexed - */ - def bulk[D]( - items: Iterator[D], - toDocument: D => String, - idKey: Option[String] = None, - suffixDateKey: Option[String] = None, - suffixDatePattern: Option[String] = None, - update: Option[Boolean] = None, - delete: Option[Boolean] = None, - parentIdKey: Option[String] = None - )(implicit bulkOptions: BulkOptions, system: ActorSystem): Set[String] = { - - implicit val materializer: Materializer = Materializer(system) - - import GraphDSL.Implicits._ - - val source = Source.fromIterator(() => items) - - val sink = Sink.fold[Set[String], Set[String]](Set.empty[String])(_ ++ _) - - val g = Flow.fromGraph(GraphDSL.create() { implicit b => - val transform = - b.add( - Flow[D].map(item => - toBulkAction( - toBulkItem( - toDocument, - idKey, - suffixDateKey, - suffixDatePattern, - update, - delete, - parentIdKey, - item - ) - ) - ) - ) - - val settings = b.add(BulkSettings[A](bulkOptions.disableRefresh)(this, toBulkElasticAction)) - - val group = b.add(Flow[A].named("group").grouped(bulkOptions.maxBulkSize).map { items => - // logger.info(s"Preparing to write batch of ${items.size}...") - items - }) - - val parallelism = Math.max(1, bulkOptions.balance) - - val bulkFlow: FlowShape[Seq[A], R] = b.add(bulk) - - val result = b.add(bulkResult) - - if (parallelism > 1) { - val balancer = b.add(Balance[Seq[A]](parallelism)) - - val merge = b.add(Merge[R](parallelism)) - - transform ~> settings ~> group ~> balancer - - 1 to parallelism foreach { _ => - balancer ~> bulkFlow ~> merge - } - - merge ~> result - } else { - transform ~> settings ~> group ~> bulkFlow ~> result - } - - FlowShape(transform.in, result.out) - }) - - val future = source.via(g).toMat(sink)(Keep.right).run() - - val indices = Await.result(future, Duration.Inf) - indices.foreach(refresh) - indices - } - - def toBulkItem[D]( - toDocument: D => String, - idKey: Option[String], - suffixDateKey: Option[String], - suffixDatePattern: Option[String], - update: Option[Boolean], - delete: Option[Boolean], - parentIdKey: Option[String], - item: D - )(implicit bulkOptions: BulkOptions): BulkItem = { - - implicit val formats: DefaultFormats = org.json4s.DefaultFormats - val document = toDocument(item) - val jsonMap = parse(document, useBigDecimalForDouble = false).extract[Map[String, Any]] - // extract id - val id = idKey.flatMap { i => - jsonMap.get(i).map(_.toString) - } - - // extract final index name - val index = suffixDateKey - .flatMap { s => - // Expecting a date field YYYY-MM-dd ... - jsonMap.get(s).map { d => - val strDate = d.toString.substring(0, 10) - val date = LocalDate.parse(strDate, DateTimeFormatter.ofPattern("yyyy-MM-dd")) - date.format( - suffixDatePattern - .map(DateTimeFormatter.ofPattern) - .getOrElse(DateTimeFormatter.ofPattern("yyyy-MM-dd")) - ) - } - } - .map(s => s"${bulkOptions.index}-$s") - // use suffix if available otherwise only index - .getOrElse(bulkOptions.index) - - // extract parent key - val parent = parentIdKey.flatMap { i => - jsonMap.get(i).map(_.toString) - } - - val action = delete match { - case Some(d) if d => BulkAction.DELETE - case _ => - update match { - case Some(u) if u => BulkAction.UPDATE - case _ => BulkAction.INDEX - } - } - - BulkItem(index, action, document, id, parent) - } - -} - -trait CountApi { - - /** Count the number of documents matching the given JSON query asynchronously. - * @param query - * - the query to count the documents for - * @return - * the number of documents matching the query, or None if the count could not be determined - */ - def countAsync(query: JSONQuery)(implicit ec: ExecutionContext): Future[Option[Double]] = { - Future { - this.count(query) - } - } - - /** Count the number of documents matching the given JSON query. - * @param query - * - the query to count the documents for - * @return - * the number of documents matching the query, or None if the count could not be determined - */ - def count(query: JSONQuery): Option[Double] - -} - -trait AggregateApi[T <: AggregateResult] { - - /** Aggregate the results of the given SQL query. - * @param sqlQuery - * - the query to aggregate the results for - * @return - * a sequence of aggregated results - */ - def aggregate(sqlQuery: SQLQuery)(implicit - ec: ExecutionContext - ): Future[_root_.scala.collection.Seq[T]] -} - -trait SingleValueAggregateApi extends AggregateApi[SingleValueAggregateResult] - -trait GetApi { - - /** Get an entity by its id from the given index. - * @param id - * - the id of the entity to get - * @param index - * - the name of the index to get the entity from (default is the entity type name) - * @param maybeType - * - the type of the entity (default is the entity class name in lowercase) - * @return - * an Option containing the entity if it was found, None otherwise - */ - def get[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], formats: Formats): Option[U] + def config: Config = ConfigFactory.load() - /** Get an entity by its id from the given index asynchronously. - * @param id - * - the id of the entity to get - * @param index - * - the name of the index to get the entity from (default is the entity type name) - * @param maybeType - * - the type of the entity (default is the entity class name in lowercase) - * @return - * a Future that completes with an Option containing the entity if it was found, None otherwise - */ - def getAsync[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], ec: ExecutionContext, formats: Formats): Future[Option[U]] = { - Future { - this.get[U](id, index, maybeType) - } - } + final lazy val elasticConfig: ElasticConfig = ElasticConfig(config) } -trait SearchApi { - implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String - - implicit def sqlQueryToJSONQuery(sqlQuery: SQLQuery): JSONQuery = { - sqlQuery.request match { - case Some(Left(value)) => - JSONQuery(value.copy(score = sqlQuery.score), collection.immutable.Seq(value.sources: _*)) - case _ => - throw new IllegalArgumentException( - s"SQL query ${sqlQuery.query} does not contain a valid search request" - ) - } - } - - implicit def sqlQueryToJSONQueries(sqlQuery: SQLQuery): JSONQueries = { - sqlQuery.request match { - case Some(Right(value)) => - JSONQueries( - value.requests - .map(request => - JSONQuery( - request.copy(score = sqlQuery.score), - collection.immutable.Seq(request.sources: _*) - ) - ) - .toList - ) - case _ => - throw new IllegalArgumentException( - s"SQL query ${sqlQuery.query} does not contain a valid search request" - ) - } - } - - /** Search for entities matching the given JSON query. - * @param jsonQuery - * - the query to search for - * @param m - * - the manifest of the type to search for - * @param formats - * - the formats to use for serialization/deserialization - * @return - * a list of entities matching the query - */ - def search[U](jsonQuery: JSONQuery)(implicit m: Manifest[U], formats: Formats): List[U] - - /** Search for entities matching the given SQL query. - * @param sqlQuery - * - the SQL query to search for - * @param m - * - the manifest of the type to search for - * @param formats - * - the formats to use for serialization/deserialization - * @return - * a list of entities matching the query - */ - def search[U](sqlQuery: SQLQuery)(implicit m: Manifest[U], formats: Formats): List[U] = { - search[U](implicitly[JSONQuery](sqlQuery))(m, formats) - } - - /** Search for entities matching the given SQL query asynchronously. - * @param sqlQuery - * - the SQL query to search for - * @param m - * - the manifest of the type to search for - * @param formats - * - the formats to use for serialization/deserialization - * @return - * a Future that completes with a list of entities matching the query - */ - def searchAsync[U]( - sqlQuery: SQLQuery - )(implicit m: Manifest[U], ec: ExecutionContext, formats: Formats): Future[List[U]] = Future( - this.search[U](sqlQuery) - ) - - /** Search for entities matching the given JSON query with inner hits. - * @param sqlQuery - * - the SQL query to search for - * @param innerField - * - the field to use for inner hits - * @param m1 - * - the manifest of the type to search for - * @param m2 - * - the manifest of the inner hit type - * @param formats - * - the formats to use for serialization/deserialization - * @return - * a list of tuples containing the main entity and a list of inner hits - */ - def searchWithInnerHits[U, I](sqlQuery: SQLQuery, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[(U, List[I])] = { - searchWithInnerHits[U, I](implicitly[JSONQuery](sqlQuery), innerField)(m1, m2, formats) - } - - /** Search for entities matching the given JSON query with inner hits. - * @param sqlQuery - * - the SQL query to search for - * @param innerField - * - the field to use for inner hits - * @param m1 - * - the manifest of the type to search for - * @param m2 - * - the manifest of the inner hit type - * @param formats - * - the formats to use for serialization/deserialization - * @return - * a list of tuples containing the main entity and a list of inner hits - */ - def searchWithInnerHits[U, I](jsonQuery: JSONQuery, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[(U, List[I])] - - /** Perform a multi-search operation with the given SQL query. - * @param sqlQuery - * - the SQL query to perform the multi-search for - * @param m - * - the manifest of the type to search for - * @param formats - * - the formats to use for serialization/deserialization - * @return - * a list of lists of entities matching the queries in the multi-search request - */ - def multiSearch[U]( - sqlQuery: SQLQuery - )(implicit m: Manifest[U], formats: Formats): List[List[U]] = { - multiSearch[U](implicitly[JSONQueries](sqlQuery))(m, formats) - } - - /** Perform a multi-search operation with the given JSON queries. - * @param jsonQueries - * - the JSON queries to perform the multi-search for - * @param m - * - the manifest of the type to search for - * @param formats - * - the formats to use for serialization/deserialization - * @return - * a list of lists of entities matching the queries in the multi-search request - */ - def multiSearch[U]( - jsonQueries: JSONQueries - )(implicit m: Manifest[U], formats: Formats): List[List[U]] - - /** Perform a multi-search operation with the given SQL query and inner hits. - * @param sqlQuery - * - the SQL query to perform the multi-search for - * @param innerField - * - the field to use for inner hits - * @param m1 - * - the manifest of the type to search for - * @param m2 - * - the manifest of the inner hit type - * @param formats - * - the formats to use for serialization/deserialization - * @return - * a list of lists of tuples containing the main entity and a list of inner hits - */ - def multiSearchWithInnerHits[U, I](sqlQuery: SQLQuery, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[List[(U, List[I])]] = { - multiSearchWithInnerHits[U, I](implicitly[JSONQueries](sqlQuery), innerField)(m1, m2, formats) - } - - /** Perform a multi-search operation with the given JSON queries and inner hits. - * @param jsonQueries - * - the JSON queries to perform the multi-search for - * @param innerField - * - the field to use for inner hits - * @param m1 - * - the manifest of the type to search for - * @param m2 - * - the manifest of the inner hit type - * @param formats - * - the formats to use for serialization/deserialization - * @return - * a list of lists of tuples containing the main entity and a list of inner hits - */ - def multiSearchWithInnerHits[U, I](jsonQueries: JSONQueries, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[List[(U, List[I])]] +trait SerializationApi { + implicit val serialization: Serialization.type = jackson.Serialization } diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientCompanion.scala b/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientCompanion.scala new file mode 100644 index 00000000..5e7f7d2d --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientCompanion.scala @@ -0,0 +1,186 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import org.apache.http.HttpHost +import org.slf4j.Logger + +import java.io.Closeable +import java.net.URI +import java.util.concurrent.atomic.AtomicInteger +import scala.language.reflectiveCalls +import scala.util.{Failure, Success, Try} + +trait ElasticClientCompanion[T <: Closeable] extends Closeable { _: { def logger: Logger } => + + def elasticConfig: ElasticConfig + + private val failures = new AtomicInteger(0) + + /** Thread-safe client instance using double-checked locking pattern + * @volatile + * ensures visibility across threads + */ + @volatile private var client: Option[T] = None + + /** Lock object for synchronized initialization + */ + private val lock = new Object() + + /** Get or create Elastic Client instance (thread-safe, lazy initialization) Uses double-checked + * locking for optimal performance + * + * @return + * Elastic Client instance + * @throws IllegalStateException + * if client creation fails + */ + def apply(): T = { + // First check (no locking) - fast path for already initialized client + client match { + case Some(c) => c + case None => + // Second check with lock - slow path for initialization + lock.synchronized { + client match { + case Some(c) => + c // Another thread initialized while we were waiting + case None => + val c = createClient() + client = Some(c) + logger.info(s"Elasticsearch Client initialized for ${elasticConfig.credentials.url}") + c + } + } + } + } + + /** Create and configure Elasticsearch Client + */ + protected def createClient(): T + + /** Parse and validate HTTP host from URL string + * @throws IllegalArgumentException + * if URL is invalid + */ + protected def parseHttpHost(url: String): HttpHost = { + // Validation de l'URL + validateUrl(url) match { + case Success(_) => + Try(HttpHost.create(url)) match { + case Success(host) => + host + case Failure(ex) => + logger.error(s"Failed to parse Elasticsearch URL: $url", ex) + throw new IllegalArgumentException(s"Invalid Elasticsearch URL: $url", ex) + } + case Failure(ex) => + logger.error(s"Invalid Elasticsearch URL: $url", ex) + throw new IllegalArgumentException(s"Invalid Elasticsearch URL format: $url", ex) + } + } + + /** Validate URL format using java.net.URI + */ + private def validateUrl(url: String): Try[URI] = { + Try { + if (url == null || url.trim.isEmpty) { + throw new IllegalArgumentException("URL cannot be null or empty") + } + + val uri = new URI(url) + + // Vérifier le schéma + if (uri.getScheme == null) { + throw new IllegalArgumentException( + s"URL must have a scheme (http:// or https://): $url" + ) + } + + val scheme = uri.getScheme.toLowerCase + if (scheme != "http" && scheme != "https") { + throw new IllegalArgumentException( + s"URL scheme must be http or https, got: $scheme" + ) + } + + // Vérifier l'hôte + if (uri.getHost == null || uri.getHost.trim.isEmpty) { + throw new IllegalArgumentException( + s"URL must have a valid hostname: $url" + ) + } + + // Vérifier le port si présent + if (uri.getPort != -1) { + if (uri.getPort < 0 || uri.getPort > 65535) { + throw new IllegalArgumentException( + s"Invalid port number: ${uri.getPort} (must be between 0 and 65535)" + ) + } + } + + uri + } + } + + /** Check if client is initialized and connected + */ + def isInitialized: Boolean = client.isDefined + + /** Test connection to Elasticsearch cluster + * @return + * true if connection is successful + */ + def testConnection(): Boolean + + /** Close the client and release resources Idempotent - safe to call multiple times + */ + override def close(): Unit = { + lock.synchronized { + client.foreach { c => + Try { + c.close() + logger.info("Elasticsearch Client closed successfully") + }.recover { case ex: Exception => + logger.warn(s"Error closing Elasticsearch Client: ${ex.getMessage}", ex) + } + client = None + } + } + } + + /** Reset client (force reconnection on next access) Useful for connection recovery scenarios + */ + def reset(): Unit = { + logger.info("Resetting Elasticsearch Client") + close() + failures.set(0) + } + + protected def incrementFailures(): Int = { + val nbFailures = failures.incrementAndGet() + if (nbFailures >= 5) { + reset() + } + nbFailures + } + + def getFailures: Int = { + failures.get() + } +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientDelegator.scala b/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientDelegator.scala new file mode 100644 index 00000000..1c4cdeff --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientDelegator.scala @@ -0,0 +1,1325 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import akka.NotUsed +import akka.actor.ActorSystem +import akka.stream.scaladsl.{Flow, Source} +import app.softnetwork.elastic.client.bulk._ +import app.softnetwork.elastic.client.result._ +import app.softnetwork.elastic.client.scroll._ +import app.softnetwork.elastic.sql.query.{SQLAggregation, SQLQuery, SQLSearchRequest} +import com.typesafe.config.Config +import org.json4s.Formats +import org.slf4j.{Logger, LoggerFactory} + +import scala.concurrent.{ExecutionContext, Future} +import scala.language.implicitConversions +import scala.reflect.{classTag, ClassTag} + +trait ElasticClientDelegator extends ElasticClientApi with BulkTypes { + + def delegate: ElasticClientApi + + // Delegate the logger to the underlying client + protected lazy val logger: Logger = LoggerFactory getLogger getClass.getName + + // Delegate config to the underlying client + override lazy val config: Config = delegate.config + + // ==================== Closeable ==================== + + override def close(): Unit = delegate.close() + + // ==================== VersionApi ==================== + + /** Get Elasticsearch version. + * + * @return + * the Elasticsearch version + */ + override def version: ElasticResult[String] = + delegate.version + + override private[client] def executeVersion(): ElasticResult[String] = + delegate.executeVersion() + + // ==================== IndicesApi ==================== + + /** Create an index with the provided name and settings. + * + * @param index + * - the name of the index to create + * @param settings + * - the settings to apply to the index (default is defaultSettings) + * @return + * true if the index was created successfully, false otherwise + */ + override def createIndex(index: String, settings: String): ElasticResult[Boolean] = + delegate.createIndex(index, settings) + + /** Delete an index with the provided name. + * + * @param index + * - the name of the index to delete + * @return + * true if the index was deleted successfully, false otherwise + */ + override def deleteIndex(index: String): ElasticResult[Boolean] = + delegate.deleteIndex(index) + + /** Close an index with the provided name. + * + * @param index + * - the name of the index to close + * @return + * true if the index was closed successfully, false otherwise + */ + override def closeIndex(index: String): ElasticResult[Boolean] = + delegate.closeIndex(index) + + /** Open an index with the provided name. + * + * @param index + * - the name of the index to open + * @return + * true if the index was opened successfully, false otherwise + */ + override def openIndex(index: String): ElasticResult[Boolean] = + delegate.openIndex(index) + + /** Reindex from source index to target index. + * + * @param sourceIndex + * - the name of the source index + * @param targetIndex + * - the name of the target index + * @param refresh + * - true to refresh the target index after reindexing, false otherwise + * @return + * true and the number of documents re indexed if the reindexing was successful, false + * otherwise + */ + override def reindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = + delegate.reindex(sourceIndex, targetIndex, refresh) + + /** Check if an index exists. + * + * @param index + * - the name of the index to check + * @return + * true if the index exists, false otherwise + */ + override def indexExists(index: String): ElasticResult[Boolean] = + delegate.indexExists(index) + + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = + delegate.createIndex(index, settings) + + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + delegate.deleteIndex(index) + + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = + delegate.closeIndex(index) + + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = + delegate.openIndex(index) + + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = + delegate.reindex(sourceIndex, targetIndex, refresh) + + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = + delegate.indexExists(index) + + // ==================== AliasApi ==================== + + /** Add an alias to an index. + * + * This operation: + * 1. Validates the index and alias names 2. Checks that the index exists 3. Adds the alias + * + * @param index + * the index name + * @param alias + * the alias name to add + * @return + * ElasticSuccess(true) if added, ElasticFailure otherwise + * @example + * {{{ + * addAlias("my-index-2024", "my-index-current") match { + * case ElasticSuccess(_) => println("Alias added") + * case ElasticFailure(error) => println(s"Error: ${error.message}") + * } + * }}} + * @note + * An alias can point to multiple indexes (useful for searches) + * @note + * An index can have multiple aliases + */ + override def addAlias(index: String, alias: String): ElasticResult[Boolean] = + delegate.addAlias(index, alias) + + /** Remove an alias from an index. + * + * @param index + * the name of the index + * @param alias + * the name of the alias to remove + * @return + * ElasticSuccess(true) if removed, ElasticFailure otherwise + * @example + * {{{ + * removeAlias("my-index-2024", "my-index-current") match { + * case ElasticSuccess(_) => println("Alias removed") + * case ElasticFailure(error) => println(s"Error: ${error.message}") + * } + * }}} + * @note + * If the alias does not exist, Elasticsearch returns a 404 error + */ + override def removeAlias(index: String, alias: String): ElasticResult[Boolean] = + delegate.removeAlias(index, alias) + + /** Check if an alias exists. + * + * @param alias + * the name of the alias to check + * @return + * ElasticSuccess(true) if it exists, ElasticSuccess(false) otherwise, ElasticFailure in case + * of error + * @example + * {{{ + * aliasExists("my-alias") match { + * case ElasticSuccess(true) => println("Alias exists") + * case ElasticSuccess(false) => println("Alias does not exist") + * case ElasticFailure(error) => println(s"Error: ${error.message}") + * } + * }}} + */ + override def aliasExists(alias: String): ElasticResult[Boolean] = + delegate.aliasExists(alias) + + /** Retrieve all aliases from an index. + * + * @param index + * the index name + * @return + * ElasticResult with the list of aliases + * @example + * {{{ + * getAliases("my-index") match { + * case ElasticSuccess(aliases) => println(s"Aliases: ${aliases.mkString(", ")}") + * case ElasticFailure(error) => println(s"Error: ${error.message}") + * } + * + * }}} + */ + override def getAliases(index: String): ElasticResult[Set[String]] = + delegate.getAliases(index) + + /** Atomic swap of an alias between two indexes. + * + * This operation is atomic: the alias is removed from oldIndex and added to newIndex in a single + * query, thus avoiding any period when the alias does not exist. This is the recommended + * operation for zero-downtime deployments. + * + * @param oldIndex + * the current index pointed to by the alias + * @param newIndex + * the new index that should point to the alias + * @param alias + * the name of the alias to swap + * @return + * ElasticSuccess(true) if swapped, ElasticFailure otherwise + * @example + * {{{ + * // Zero-downtime deployment + * swapAlias(oldIndex = "products-v1", newIndex = "products-v2", alias = "products") match { + * case ElasticSuccess(_) => println("✅ Alias swapped, new version deployed") + * case ElasticFailure(error) => println(s"❌ Error: ${error.message}") + * } + * }}} + * @note + * This operation is atomic and therefore preferable to removeAlias + addAlias + */ + override def swapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): ElasticResult[Boolean] = + delegate.swapAlias(oldIndex, newIndex, alias) + + override private[client] def executeAddAlias( + index: String, + alias: String + ): ElasticResult[Boolean] = + delegate.addAlias(index, alias) + + override private[client] def executeRemoveAlias( + index: String, + alias: String + ): ElasticResult[Boolean] = + delegate.removeAlias(index, alias) + + override private[client] def executeAliasExists(alias: String): ElasticResult[Boolean] = + delegate.aliasExists(alias) + + override private[client] def executeGetAliases(index: String): ElasticResult[String] = + delegate.executeGetAliases(index) + + override private[client] def executeSwapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): ElasticResult[Boolean] = + delegate.swapAlias(oldIndex, newIndex, alias) + + // ==================== SettingsApi ==================== + + /** Toggle the refresh interval of an index. + * + * @param index + * - the name of the index + * @param enable + * - true to enable the refresh interval, false to disable it + * @return + * true if the settings were updated successfully, false otherwise + */ + override def toggleRefresh(index: String, enable: Boolean): ElasticResult[Boolean] = + delegate.toggleRefresh(index, enable) + + /** Set the number of replicas for an index. + * + * @param index + * - the name of the index + * @param replicas + * - the number of replicas to set + * @return + * true if the settings were updated successfully, false otherwise + */ + override def setReplicas(index: String, replicas: Int): ElasticResult[Boolean] = + delegate.setReplicas(index, replicas) + + /** Update index settings. + * + * @param index + * - the name of the index + * @param settings + * - the settings to apply to the index (default is defaultSettings) + * @return + * true if the settings were updated successfully, false otherwise + */ + override def updateSettings(index: String, settings: String): ElasticResult[Boolean] = + delegate.updateSettings(index, settings) + + /** Load the settings of an index. + * + * @param index + * - the name of the index to load the settings for + * @return + * the settings of the index as a JSON string + */ + override def loadSettings(index: String): ElasticResult[String] = + delegate.loadSettings(index) + + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): ElasticResult[Boolean] = + delegate.updateSettings(index, settings) + + override private[client] def executeLoadSettings(index: String): ElasticResult[String] = { + delegate.loadSettings(index) + } + + // ==================== MappingApi ==================== + + /** Set the mapping of an index. + * + * @param index + * - the name of the index to set the mapping for + * @param mapping + * - the mapping to set on the index + * @return + * true if the mapping was set successfully, false otherwise + */ + override def setMapping(index: String, mapping: String): ElasticResult[Boolean] = + delegate.setMapping(index, mapping) + + /** Get the mapping of an index. + * + * @param index + * - the name of the index to get the mapping for + * @return + * the mapping of the index as a JSON string + */ + override def getMapping(index: String): ElasticResult[String] = + delegate.getMapping(index) + + /** Get the mapping properties of an index. + * + * @param index + * - the name of the index to get the mapping properties for + * @return + * the mapping properties of the index as a JSON string + */ + override def getMappingProperties(index: String): ElasticResult[String] = + delegate.getMappingProperties(index) + + /** Check if the mapping of an index is different from the provided mapping. + * + * @param index + * - the name of the index to check + * @param mapping + * - the mapping to compare with the current mapping of the index + * @return + * true if the mapping is different, false otherwise + */ + override def shouldUpdateMapping(index: String, mapping: String): ElasticResult[Boolean] = + delegate.shouldUpdateMapping(index, mapping) + + /** Update the mapping of an index to a new mapping. + * + * This method handles three scenarios: + * 1. Index doesn't exist: Create it with the new mapping 2. Index exists but mapping is + * outdated: Migrate to new mapping 3. Index exists and mapping is current: Do nothing + * + * @param index + * - the name of the index to migrate + * @param mapping + * - the new mapping to set on the index + * @param settings + * - the settings to apply to the index (default is defaultSettings) + * @return + * true if the mapping was created or updated successfully, false otherwise + */ + override def updateMapping( + index: String, + mapping: String, + settings: String + ): ElasticResult[Boolean] = + delegate.updateMapping(index, mapping, settings) + + override private[client] def executeSetMapping( + index: String, + mapping: String + ): ElasticResult[Boolean] = + delegate.setMapping(index, mapping) + + override private[client] def executeGetMapping(index: String): ElasticResult[String] = { + delegate.getMapping(index) + } + + // ==================== RefreshApi ==================== + + /** Refresh the index to make sure all documents are indexed and searchable. + * + * @param index + * - the name of the index to refresh + * @return + * true if the index was refreshed successfully, false otherwise + */ + override def refresh(index: String): ElasticResult[Boolean] = delegate.refresh(index) + + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = + delegate.executeRefresh(index) + + // ==================== FlushApi ==================== + + /** Flush the index to make sure all operations are written to disk. + * + * @param index + * - the name of the index to flush + * @param force + * - true to force the flush, false otherwise + * @param wait + * - true to wait for the flush to complete, false otherwise + * @return + * true if the index was flushed successfully, false otherwise + */ + override def flush(index: String, force: Boolean, wait: Boolean): ElasticResult[Boolean] = + delegate.flush(index, force, wait) + + override private[client] def executeFlush( + index: String, + force: Boolean, + wait: Boolean + ): ElasticResult[Boolean] = + delegate.executeFlush(index, force, wait) + + // ==================== IndexApi ==================== + + /** Index an entity in the given index. + * + * @param entity + * - the entity to index + * @param id + * - the id of the entity to index + * @param index + * - the name of the index to index the entity in (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @return + * true if the entity was indexed successfully, false otherwise + */ + override def indexAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String], + maybeType: Option[String] + )(implicit u: ClassTag[U], formats: Formats): ElasticResult[Boolean] = + delegate.indexAs(entity, id, index, maybeType) + + /** Index an entity in the given index. + * + * @param index + * - the name of the index to index the entity in + * @param id + * - the id of the entity to index + * @param source + * - the source of the entity to index in JSON format + * @return + * true if the entity was indexed successfully, false otherwise + */ + override def index(index: String, id: String, source: String): ElasticResult[Boolean] = + delegate.index(index, id, source) + + /** Index an entity in the given index asynchronously. + * + * @param entity + * - the entity to index + * @param id + * - the id of the entity to index + * @param index + * - the name of the index to index the entity in (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @return + * a Future that completes with true if the entity was indexed successfully, false otherwise + */ + override def indexAsyncAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String], + maybeType: Option[String] + )(implicit + u: ClassTag[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Boolean]] = + delegate.indexAsyncAs(entity, id, index, maybeType) + + /** Index an entity in the given index asynchronously. + * + * @param index + * - the name of the index to index the entity in + * @param id + * - the id of the entity to index + * @param source + * - the source of the entity to index in JSON format + * @return + * a Future that completes with true if the entity was indexed successfully, false otherwise + */ + override def indexAsync(index: String, id: String, source: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = delegate.indexAsync(index, id, source) + override private[client] def executeIndex( + index: String, + id: String, + source: String + ): ElasticResult[Boolean] = + delegate.executeIndex(index, id, source) + + override private[client] def executeIndexAsync( + index: String, + id: String, + source: String + )(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] = + delegate.executeIndexAsync(index, id, source) + + // ==================== UpdateApi ==================== + + /** Update an entity in the given index. + * + * @param index + * - the name of the index to update the entity in + * @param id + * - the id of the entity to update + * @param source + * - the source of the entity to update in JSON format + * @param upsert + * - true to upsert the entity if it does not exist, false otherwise + * @return + * true if the entity was updated successfully, false otherwise + */ + override def update( + index: String, + id: String, + source: String, + upsert: Boolean + ): ElasticResult[Boolean] = + delegate.update(index, id, source, upsert) + + /** Update an entity in the given index. + * + * @param entity + * - the entity to update + * @param id + * - the id of the entity to update + * @param index + * - the name of the index to update the entity in (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @param upsert + * - true to upsert the entity if it does not exist, false otherwise + * @return + * true if the entity was updated successfully, false otherwise + */ + override def updateAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String], + maybeType: Option[String], + upsert: Boolean + )(implicit u: ClassTag[U], formats: Formats): ElasticResult[Boolean] = + delegate.updateAs(entity, id, index, maybeType, upsert) + + /** Update an entity in the given index asynchronously. + * + * @param index + * - the name of the index to update the entity in + * @param id + * - the id of the entity to update + * @param source + * - the source of the entity to update in JSON format + * @param upsert + * - true to upsert the entity if it does not exist, false otherwise + * @return + * a Future that completes with true if the entity was updated successfully, false otherwise + */ + override def updateAsync(index: String, id: String, source: String, upsert: Boolean)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = + delegate.updateAsync(index, id, source, upsert) + + /** Update an entity in the given index asynchronously. + * + * @param entity + * - the entity to update + * @param id + * - the id of the entity to update + * @param index + * - the name of the index to update the entity in (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @param upsert + * - true to upsert the entity if it does not exist, false otherwise + * @return + * a Future that completes with true if the entity was updated successfully, false otherwise + */ + override def updateAsyncAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String], + maybeType: Option[String], + upsert: Boolean + )(implicit + u: ClassTag[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Boolean]] = + delegate.updateAsyncAs(entity, id, index, maybeType, upsert) + + override private[client] def executeUpdate( + index: String, + id: String, + source: String, + upsert: Boolean + ): ElasticResult[Boolean] = + delegate.executeUpdate(index, id, source, upsert) + + override private[client] def executeUpdateAsync( + index: String, + id: String, + source: String, + upsert: Boolean + )(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] = + delegate.executeUpdateAsync(index, id, source, upsert) + + // ==================== DeleteApi ==================== + + /** Delete an entity from the given index. + * + * @param id + * - the id of the entity to delete + * @param index + * - the name of the index to delete the entity from + * @return + * true if the entity was deleted successfully, false otherwise + */ + override def delete(id: String, index: String): ElasticResult[Boolean] = + delegate.delete(id, index) + + /** Delete an entity from the given index asynchronously. + * + * @param id + * - the id of the entity to delete + * @param index + * - the name of the index to delete the entity from + * @return + * a Future that completes with true if the entity was deleted successfully, false otherwise + */ + override def deleteAsync(id: String, index: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = + delegate.deleteAsync(id, index) + + override private[client] def executeDelete( + index: String, + id: String + ): ElasticResult[Boolean] = + delegate.executeDelete(index, id) + + override private[client] def executeDeleteAsync(index: String, id: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = + delegate.executeDeleteAsync(index, id) + + // ==================== GetApi ==================== + + /** Get a document by its id from the given index. + * + * @param id + * - the id of the document to get + * @param index + * - the name of the index to get the document from + * @return + * an Option containing the document as a JSON string if it was found, None otherwise + */ + override def get(id: String, index: String): ElasticResult[Option[String]] = + delegate.get(id, index) + + /** Get an entity by its id from the given index. + * + * @param id + * - the id of the entity to get + * @param index + * - the name of the index to get the entity from (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @return + * an Option containing the entity if it was found, None otherwise + */ + override def getAs[U <: AnyRef](id: String, index: Option[String], maybeType: Option[String])( + implicit + m: Manifest[U], + formats: Formats + ): ElasticResult[Option[U]] = + delegate.getAs(id, index, maybeType) + + /** Get a document by its id from the given index asynchronously. + * + * @param id + * - the id of the document to get + * @param index + * - the name of the index to get the document from + * @return + * a Future that completes with an Option containing the document as a JSON string if it was + * found, None otherwise + */ + override def getAsync(id: String, index: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[String]]] = + delegate.getAsync(id, index) + + /** Get an entity by its id from the given index asynchronously. + * + * @param id + * - the id of the entity to get + * @param index + * - the name of the index to get the entity from (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @return + * a Future that completes with an Option containing the entity if it was found, None otherwise + */ + override def getAsyncAs[U <: AnyRef]( + id: String, + index: Option[String], + maybeType: Option[String] + )(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Option[U]]] = + delegate.getAsyncAs(id, index, maybeType) + + override private[client] def executeGet( + index: String, + id: String + ): ElasticResult[Option[String]] = + delegate.executeGet(index, id) + + override private[client] def executeGetAsync(index: String, id: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[String]]] = + delegate.executeGetAsync(index, id) + + // ==================== CountApi ==================== + + /** Count the number of documents matching the given JSON query. + * + * @param query + * - the query to count the documents for + * @return + * the number of documents matching the query, or None if the count could not be determined + */ + override def count(query: ElasticQuery): ElasticResult[Option[Double]] = + delegate.count(query) + + /** Count the number of documents matching the given JSON query asynchronously. + * + * @param query + * - the query to count the documents for + * @return + * the number of documents matching the query, or None if the count could not be determined + */ + override def countAsync(query: ElasticQuery)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[Double]]] = + delegate.countAsync(query) + + override private[client] def executeCount(query: ElasticQuery): ElasticResult[Option[Double]] = + delegate.executeCount(query) + + override private[client] def executeCountAsync(query: ElasticQuery)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[Double]]] = + delegate.executeCountAsync(query) + + // ==================== AggregateApi ================= + + /** Aggregate the results of the given SQL query. + * + * @param sqlQuery + * - the query to aggregate the results for + * @return + * a sequence of aggregated results + */ + override def aggregate(sqlQuery: SQLQuery)(implicit + ec: ExecutionContext + ): Future[ElasticResult[collection.Seq[SingleValueAggregateResult]]] = + delegate.aggregate(sqlQuery) + + // ==================== SearchApi ==================== + + /** Search for documents / aggregations matching the SQL query. + * + * @param sql + * the SQL query to execute + * @return + * the Elasticsearch response + */ + override def search(sql: SQLQuery): ElasticResult[ElasticResponse] = delegate.search(sql) + + /** Search for documents / aggregations matching the Elasticsearch query. + * + * @param elasticQuery + * the Elasticsearch query + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @return + * the Elasticsearch response + */ + override def singleSearch( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + ): ElasticResult[ElasticResponse] = + delegate.singleSearch(elasticQuery, fieldAliases, aggregations) + + /** Multi-search with Elasticsearch queries. + * + * @param elasticQueries + * Elasticsearch queries + * @param fieldAliases + * field aliases + * @param aggregations + * SQL aggregations + * @return + * the combined Elasticsearch response + */ + override def multiSearch( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + ): ElasticResult[ElasticResponse] = + delegate.multiSearch(elasticQueries, fieldAliases, aggregations) + + /** Asynchronous search for documents / aggregations matching the SQL query. + * + * @param sqlQuery + * the SQL query + * @return + * a Future containing the Elasticsearch response + */ + override def searchAsync(sqlQuery: SQLQuery)(implicit + ec: ExecutionContext + ): Future[ElasticResult[ElasticResponse]] = delegate.searchAsync(sqlQuery) + + /** Asynchronous search for documents / aggregations matching the Elasticsearch query. + * + * @param elasticQuery + * the Elasticsearch query + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @return + * a Future containing the Elasticsearch response + */ + override def singleSearchAsync( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit ec: ExecutionContext): Future[ElasticResult[ElasticResponse]] = + delegate.singleSearchAsync(elasticQuery, fieldAliases, aggregations) + + /** Asynchronous multi-search with Elasticsearch queries. + * + * @param elasticQueries + * the Elasticsearch queries + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @return + * a Future containing the combined Elasticsearch response + */ + override def multiSearchAsync( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit ec: ExecutionContext): Future[ElasticResult[ElasticResponse]] = + delegate.multiSearchAsync(elasticQueries, fieldAliases, aggregations) + + /** Searches and converts results into typed entities from an SQL query. + * + * @param sqlQuery + * the SQL query containing fieldAliases and aggregations + * @tparam U + * the type of entities to return + * @return + * the entities matching the query + */ + override def searchAs[U]( + sqlQuery: SQLQuery + )(implicit m: Manifest[U], formats: Formats): ElasticResult[Seq[U]] = delegate.searchAs(sqlQuery) + + /** Searches and converts results into typed entities. + * + * @param elasticQuery + * the Elasticsearch query + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @tparam U + * the type of entities to return + * @return + * the entities matching the query + */ + override def singleSearchAs[U]( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit m: Manifest[U], formats: Formats): ElasticResult[Seq[U]] = + delegate.singleSearchAs(elasticQuery, fieldAliases, aggregations) + + /** Multi-search with conversion to typed entities. + * + * @param elasticQueries + * the Elasticsearch queries + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @tparam U + * the type of entities to return + * @return + * the entities matching the queries + */ + override def multisearchAs[U]( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit m: Manifest[U], formats: Formats): ElasticResult[Seq[U]] = + delegate.multisearchAs(elasticQueries, fieldAliases, aggregations) + + /** Asynchronous search with conversion to typed entities. + * + * @param sqlQuery + * the SQL query + * @tparam U + * the type of entities to return + * @return + * a Future containing the entities + */ + override def searchAsyncAs[U](sqlQuery: SQLQuery)(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Seq[U]]] = delegate.searchAsyncAs(sqlQuery) + + /** Asynchronous search with conversion to typed entities. + * + * @param elasticQuery + * the Elasticsearch query + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @tparam U + * the type of entities to return + * @return + * a Future containing the entities + */ + override def singleSearchAsyncAs[U]( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Seq[U]]] = + delegate.singleSearchAsyncAs(elasticQuery, fieldAliases, aggregations) + + /** Asynchronous multi-search with conversion to typed entities. + * + * @param elasticQueries + * the Elasticsearch queries + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @tparam U + * the type of entities to return + * @return + * a Future containing the entities + */ + override def multiSearchAsyncAs[U]( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Seq[U]]] = + delegate.multiSearchAsyncAs(elasticQueries, fieldAliases, aggregations) + + override def searchWithInnerHits[U: Manifest: ClassTag, I: Manifest: ClassTag]( + sql: SQLQuery, + innerField: String + )(implicit + formats: Formats + ): ElasticResult[Seq[(U, Seq[I])]] = + delegate.searchWithInnerHits[U, I](sql, innerField) + + override def singleSearchWithInnerHits[U: Manifest: ClassTag, I: Manifest: ClassTag]( + elasticQuery: ElasticQuery, + innerField: String + )(implicit formats: Formats): ElasticResult[Seq[(U, Seq[I])]] = + delegate.singleSearchWithInnerHits[U, I](elasticQuery, innerField) + + override def multisearchWithInnerHits[U: Manifest: ClassTag, I: Manifest: ClassTag]( + elasticQueries: ElasticQueries, + innerField: String + )(implicit formats: Formats): ElasticResult[Seq[(U, Seq[I])]] = + delegate.multisearchWithInnerHits[U, I](elasticQueries, innerField) + + override private[client] implicit def sqlSearchRequestToJsonQuery( + sqlSearch: SQLSearchRequest + ): String = + delegate.sqlSearchRequestToJsonQuery(sqlSearch) + + override private[client] def executeSingleSearch( + elasticQuery: ElasticQuery + ): ElasticResult[Option[String]] = + delegate.executeSingleSearch(elasticQuery) + + override private[client] def executeMultiSearch( + elasticQueries: ElasticQueries + ): ElasticResult[Option[String]] = + delegate.executeMultiSearch(elasticQueries) + + override private[client] def executeSingleSearchAsync(elasticQuery: ElasticQuery)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[String]]] = + delegate.executeSingleSearchAsync(elasticQuery) + + override private[client] def executeMultiSearchAsync(elasticQueries: ElasticQueries)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[String]]] = + delegate.executeMultiSearchAsync(elasticQueries) + + // ==================== ScrollApi ==================== + + /** Create a scrolling source with automatic strategy selection + */ + override def scroll(sql: SQLQuery, config: ScrollConfig)(implicit + system: ActorSystem + ): Source[(Map[String, Any], ScrollMetrics), NotUsed] = delegate.scroll(sql, config) + + /** Typed scroll source + */ + override def scrollAs[T](sql: SQLQuery, config: ScrollConfig)(implicit + system: ActorSystem, + m: Manifest[T], + formats: Formats + ): Source[(T, ScrollMetrics), NotUsed] = delegate.scrollAs(sql, config) + + override private[client] def scrollClassic( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation], + config: ScrollConfig + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = { + delegate.scrollClassic(elasticQuery, fieldAliases, aggregations, config) + } + + override private[client] def searchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = { + delegate.searchAfter(elasticQuery, fieldAliases, config, hasSorts) + } + + override private[client] def pitSearchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[JSONResults, JSONResults], + config: ScrollConfig, + hasSorts: Boolean + )(implicit system: ActorSystem) = { + delegate.pitSearchAfter(elasticQuery, fieldAliases, config, hasSorts) + } + + // ==================== BulkApi ==================== + + /** Bulk with detailed results (successes + failures). + * + * This method provides: + * + * - List of successfully indexed documents + * - List of failed documents with error details + * - Performance metrics + * - Configurable automatic retry + * + * @param items + * : documents to index + * @param toDocument + * : JSON transformation function + * @param indexKey + * : key for the index field + * @param idKey + * : key for the id field + * @param suffixDateKey + * : key for the date field to suffix the index + * @param suffixDatePattern + * : date pattern for the suffix + * @param update + * : true for upsert, false for index + * @param delete + * : true for delete + * @param parentIdKey + * : key for the parent field + * @param callbacks + * : callbacks for events + * @param bulkOptions + * : configuration options + * @return + * Future with detailed results + */ + override def bulkWithResult[D]( + items: Iterator[D], + toDocument: D => String, + indexKey: Option[String], + idKey: Option[String], + suffixDateKey: Option[String], + suffixDatePattern: Option[String], + update: Option[Boolean], + delete: Option[Boolean], + parentIdKey: Option[String], + callbacks: BulkCallbacks + )(implicit bulkOptions: BulkOptions, system: ActorSystem): Future[BulkResult] = + delegate.bulkWithResult( + items, + toDocument, + indexKey, + idKey, + suffixDateKey, + suffixDatePattern, + update, + delete, + parentIdKey, + callbacks + ) + + /** Source: Akka Streams, which provides real-time results. + * + * Each emitted item is either: + * - Right(id) for success + * - Left(failed) for failure + * + * @example + * {{{ + * bulkSource(items, toDocument) + * .runWith(Sink.foreach { + * case Right(id) => println(s"✅ Success: $id") + * case Left(failed) => println(s"❌ Failed: ${failed.id}") + * }) + * }}} + * @param items + * the documents to index + * @param toDocument + * JSON transformation function + * @param indexKey + * key for the index field + * @param idKey + * key for the id field + * @param suffixDateKey + * date field key to suffix the index + * @param suffixDatePattern + * date pattern for the suffix + * @param update + * true for upsert, false for index + * @param delete + * true to delete + * @param parentIdKey + * parent field key + * @param bulkOptions + * configuration options + * @return + * Source outputting Right(id) or Left(failed) + */ + override def bulkSource[D]( + items: Iterator[D], + toDocument: D => String, + indexKey: Option[String], + idKey: Option[String], + suffixDateKey: Option[String], + suffixDatePattern: Option[String], + update: Option[Boolean], + delete: Option[Boolean], + parentIdKey: Option[String] + )(implicit + bulkOptions: BulkOptions, + system: ActorSystem + ): Source[Either[FailedDocument, SuccessfulDocument], NotUsed] = delegate.bulkSource( + items, + toDocument, + indexKey, + idKey, + suffixDateKey, + suffixDatePattern, + update, + delete, + parentIdKey + ) + + /** Backward compatible API (old signature). + * + * @deprecated + * Use `bulkWithResult` to get failure details + */ + override def bulk[D]( + items: Iterator[D], + toDocument: D => String, + indexKey: Option[String], + idKey: Option[String], + suffixDateKey: Option[String], + suffixDatePattern: Option[String], + update: Option[Boolean], + delete: Option[Boolean], + parentIdKey: Option[String] + )(implicit bulkOptions: BulkOptions, system: ActorSystem): ElasticResult[BulkResult] = delegate + .bulk( + items, + toDocument, + indexKey, + idKey, + suffixDateKey, + suffixDatePattern, + update, + delete, + parentIdKey + ) + + override private[client] def toBulkAction(bulkItem: BulkItem): BulkActionType = + delegate.toBulkAction(bulkItem).asInstanceOf[BulkActionType] + + override private[client] implicit def toBulkElasticAction(a: BulkActionType): BulkElasticAction = + delegate.toBulkElasticAction(a.asInstanceOf) + + /** Basic flow for executing a bulk action. This method must be implemented by concrete classes + * depending on the Elasticsearch version and client used. + * + * @param bulkOptions + * configuration options + * @return + * Flow transforming bulk actions into results + */ + override private[client] def bulkFlow(implicit + bulkOptions: BulkOptions, + system: ActorSystem + ): Flow[Seq[BulkActionType], BulkResultType, NotUsed] = + delegate.bulkFlow(bulkOptions, system).asInstanceOf + + /** Convert a BulkResultType into individual results. This method must extract the successes and + * failures from the ES response. + * + * @param result + * raw result from the bulk + * @return + * sequence of Right(id) for success or Left(failed) for failure + */ + override private[client] def extractBulkResults( + result: BulkResultType, + originalBatch: Seq[BulkItem] + ): Seq[Either[FailedDocument, SuccessfulDocument]] = + delegate.extractBulkResults(result.asInstanceOf, originalBatch) + + /** Conversion BulkActionType -> BulkItem */ + override private[client] def actionToBulkItem(action: BulkActionType): BulkItem = + delegate.actionToBulkItem(action.asInstanceOf) +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientHelpers.scala b/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientHelpers.scala new file mode 100644 index 00000000..5e0ec60e --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/ElasticClientHelpers.scala @@ -0,0 +1,222 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.ElasticError +import org.slf4j.Logger + +import scala.language.reflectiveCalls +import scala.util.{Failure, Success, Try} + +trait ElasticClientHelpers { + + protected def logger: Logger + + /** Validate the name of an index. Elasticsearch rules: + * - Not empty + * - Lowercase only + * - No characters: \, /, *, ?, ", <, >, |, space, comma, # + * - No colon (:) except for system indexes + * - Does not start with -, _, + + * - Is not . or .. + * @param index + * name of the index to validate + * @return + * Some(ElasticError) if invalid, None if valid + */ + protected def validateIndexName(index: String): Option[ElasticError] = { + if (index == null || index.trim.isEmpty) { + return Some( + ElasticError( + message = "Index name cannot be empty", + cause = None, + statusCode = Some(400), + operation = Some("validateIndexName") + ) + ) + } + + val trimmed = index.trim + + // ✅ Elasticsearch rules + if (trimmed == "." || trimmed == "..") { + return Some( + ElasticError( + message = s"Index name cannot be '.' or '..'", + cause = None, + statusCode = Some(400), + operation = Some("validateIndexName") + ) + ) + } + + if (trimmed.startsWith("-") || trimmed.startsWith("_") || trimmed.startsWith("+")) { + return Some( + ElasticError( + message = s"Index name cannot start with '-', '_', or '+'", + cause = None, + statusCode = Some(400), + operation = Some("validateIndexName") + ) + ) + } + + if (trimmed != trimmed.toLowerCase) { + return Some( + ElasticError( + message = s"Index name must be lowercase", + cause = None, + statusCode = Some(400), + operation = Some("validateIndexName") + ) + ) + } + + val invalidChars = """[\\/*?"<>| ,#]""".r + if (invalidChars.findFirstIn(trimmed).isDefined) { + return Some( + ElasticError( + message = + s"Index name contains invalid characters: \\, /, *, ?, \", <, >, |, space, comma, #", + cause = None, + statusCode = Some(400), + operation = Some("validateIndexName") + ) + ) + } + + if (trimmed.length > 255) { + return Some( + ElasticError( + message = s"Index name is too long (max 255 characters): ${trimmed.length}", + cause = None, + statusCode = Some(400), + operation = Some("validateIndexName") + ) + ) + } + + None // Valid + } + + /** Validate the JSON. + * @param operation + * name of the operation + * @param jsonString + * the JSON to validate + * @return + * Some(ElasticError) if invalid, None if valid + */ + protected def validateJson( + operation: String = "validateJson", + jsonString: String + ): Option[ElasticError] = { + if (jsonString == null || jsonString.trim.isEmpty) { + return Some( + ElasticError( + message = "Settings cannot be empty", + cause = None, + statusCode = Some(400), + operation = Some(operation) + ) + ) + } + + val trimmed = jsonString.trim + if (trimmed.contains("//") || trimmed.contains("/*")) { + return Some( + ElasticError( + message = "Invalid JSON: Comments are not allowed in JSON", + cause = None, + statusCode = Some(400), + operation = Some(operation) + ) + ) + } + + // ✅ Basic JSON validation + Try { + import org.json4s.jackson.JsonMethods._ + parse(jsonString) + } match { + case Success(_) => None // Valid + case Failure(ex) => + Some( + ElasticError( + message = s"Invalid JSON: ${ex.getMessage}", + cause = Some(ex), + statusCode = Some(400), + operation = Some(operation) + ) + ) + } + } + + /** Validate the JSON settings. + * @param settings + * settings in JSON format + * @return + * Some(ElasticError) if invalid, None if valid + */ + protected def validateJsonSettings(settings: String): Option[ElasticError] = { + validateJson("validateJsonSettings", settings) + } + + /** Validate the alias name. Aliases follow the same rules as indexes. + * @param alias + * alias name to validate + * @return + * Some(ElasticError) if invalid, None if valid + */ + protected def validateAliasName(alias: String): Option[ElasticError] = { + // ✅ Aliases follow the same rules as indexes + validateIndexName(alias) match { + case Some(error) => + Some( + error.copy( + operation = Some("validateAliasName") + ) + ) + case None => None + } + } + + /** Logger une erreur avec le niveau approprié selon le status code. */ + protected def logError( + operation: String, + indexStr: String, + error: ElasticError + ): Unit = { + error.statusCode match { + case Some(404) => + // 404 n'est pas forcément une erreur (ex: indexExists) + logger.debug(s"Operation '$operation'$indexStr: ${error.message}") + + case Some(status) if status >= 500 => + // Server error + logger.error(s"Server error during '$operation'$indexStr: ${error.message}") + + case Some(status) if status >= 400 => + // Client error + logger.warn(s"Client error during '$operation'$indexStr: ${error.message}") + + case _ => + logger.error(s"Operation '$operation'$indexStr failed: ${error.message}") + } + } + +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala new file mode 100644 index 00000000..c69b085a --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/ElasticConversion.scala @@ -0,0 +1,667 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import com.fasterxml.jackson.databind.{DeserializationFeature, JsonNode, ObjectMapper} +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule +import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule} +import org.json4s.{Extraction, Formats} + +import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, ZoneId, ZonedDateTime} +import java.time.format.DateTimeFormatter +import scala.util.Try +import scala.jdk.CollectionConverters._ + +trait ElasticConversion { + private[this] val mapper = new ObjectMapper() with ClassTagExtensions + mapper.registerModule(DefaultScalaModule) + mapper.registerModule(new JavaTimeModule()) + //mapper.registerModule(new ParameterNamesModule()) + + // Ignore unknown properties during deserialization + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + + def convertTo[T](map: Map[String, Any])(implicit m: Manifest[T], formats: Formats): T = { + val jValue = Extraction.decompose(map) + jValue.extract[T] + } + + def convertTo[T](response: ElasticResponse)(implicit + m: Manifest[T], + formats: Formats + ): Try[Seq[T]] = { + parseResponse(response).map { rows => + rows.map { row => + convertTo[T](row)(m, formats) + } + } + } + + // Formatters for elasticsearch ISO 8601 date/time strings + private val isoDateTimeFormatter = DateTimeFormatter.ISO_DATE_TIME + private val isoDateFormatter = DateTimeFormatter.ISO_DATE + private val isoTimeFormatter = DateTimeFormatter.ISO_TIME + + /** main entry point : parse json response from elasticsearch Handles both single search and + * multi-search (msearch/UNION ALL) responses + */ + def parseResponse( + response: ElasticResponse + ): Try[Seq[Map[String, Any]]] = { + val json = mapper.readTree(response.results) + // Check if it's a multi-search response (array of responses) + if (json.isArray) { + parseMultiSearchResponse(json, response.fieldAliases, response.aggregations) + } else { + // Single search response + parseSingleSearchResponse(json, response.fieldAliases, response.aggregations) + } + } + + /** Parse a multi-search response (array of search responses) Used for UNION ALL queries + */ + def parseMultiSearchResponse( + jsonArray: JsonNode, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] + ): Try[Seq[Map[String, Any]]] = + Try { + val responses = jsonArray.elements().asScala.toList + + // Collect all errors + val errors = responses.zipWithIndex.collect { + case (response, idx) if response.has("error") => + val errorMsg = Option(response.get("error").get("reason")) + .map(_.asText()) + .getOrElse("Unknown error") + s"Query ${idx + 1}: $errorMsg" + } + + if (errors.nonEmpty) { + throw new Exception(s"Elasticsearch errors in multi-search:\n${errors.mkString("\n")}") + } else { + // Parse each response and combine all rows + val allRows = responses.flatMap { response => + if (!response.has("error")) { + jsonToRows(response, fieldAliases, aggregations) + } else { + Seq.empty + } + } + allRows + } + } + + /** Parse a single search response + */ + def parseSingleSearchResponse( + json: JsonNode, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] + ): Try[Seq[Map[String, Any]]] = + Try { + // check if it is an error response + if (json.has("error")) { + val errorMsg = Option(json.get("error").get("reason")) + .map(_.asText()) + .getOrElse("Unknown Elasticsearch error") + throw new Exception(s"Elasticsearch error: $errorMsg") + } else { + jsonToRows(json, fieldAliases, aggregations) + } + } + + /** convert JsonNode to Rows + */ + def jsonToRows( + json: JsonNode, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] + ): Seq[Map[String, Any]] = { + val hitsNode = Option(json.path("hits").path("hits")) + .filter(_.isArray) + .map(_.elements().asScala.toList) + + val aggsNode = Option(json.path("aggregations")) + .filter(!_.isMissingNode) + + (hitsNode, aggsNode) match { + case (Some(hits), None) if hits.nonEmpty => + // Case 1 : only hits + parseSimpleHits(hits, fieldAliases) + + case (None, Some(aggs)) => + // Case 2 : only aggregations + parseAggregations(aggs, Map.empty, fieldAliases, aggregations) + + case (Some(hits), Some(aggs)) if hits.isEmpty => + // Case 3 : aggregations with no hits + parseAggregations(aggs, Map.empty, fieldAliases, aggregations) + + case (Some(hits), Some(aggs)) if hits.nonEmpty => + // Case 4 : Hits + global aggregations + val globalMetrics = extractGlobalMetrics(aggs) + hits.map { hit => + val source = extractSource(hit, fieldAliases) + val metadata = extractHitMetadata(hit) + val innerHits = extractInnerHits(hit, fieldAliases) + globalMetrics ++ source ++ metadata ++ innerHits + } + + case _ => + Seq.empty + } + } + + /** Parse simple hits (without aggregations) + */ + def parseSimpleHits( + hits: List[JsonNode], + fieldAliases: Map[String, String] + ): Seq[Map[String, Any]] = { + hits.map { hit => + val source = extractSource(hit, fieldAliases) + val metadata = extractHitMetadata(hit) + val innerHits = extractInnerHits(hit, fieldAliases) + source ++ metadata ++ innerHits + } + } + + /** Extract hit metadata (_id, _index, _score) + */ + def extractHitMetadata(hit: JsonNode): Map[String, Any] = { + Map( + "_id" -> Option(hit.get("_id")).map(_.asText()), + "_index" -> Option(hit.get("_index")).map(_.asText()), + "_score" -> Option(hit.get("_score")).map(n => + if (n.isDouble || n.isFloat) n.asDouble() else n.asLong().toDouble + ), + "_sort" -> Option(hit.get("sort")) + .filter(_.isArray) + .map(sortNode => sortNode.elements().asScala.map(jsonNodeToAny(_, Map.empty)).toList) + ).collect { case (k, Some(v)) => k -> v } + } + + /** Extract hit _source + */ + def extractSource(hit: JsonNode, fieldAliases: Map[String, String]): Map[String, Any] = { + Option(hit.get("_source")) + .filter(_.isObject) + .map(jsonNodeToMap(_, fieldAliases)) + .getOrElse(Map.empty) + } + + /** Extract inner_hits from a hit (for nested or parent-child queries) */ + private def extractInnerHits( + hit: JsonNode, + fieldAliases: Map[String, String] + ): Map[String, Any] = { + Option(hit.get("inner_hits")) + .filter(_.isObject) + .map { innerHitsNode => + innerHitsNode + .properties() + .asScala + .map { entry => + val innerHitName = entry.getKey + val innerHitData = entry.getValue + + // Extract the hits array from inner_hits + val innerHitsList = Option(innerHitData.path("hits").path("hits")) + .filter(_.isArray) + .map { hitsArray => + hitsArray + .elements() + .asScala + .map { innerHit => + // Extract source and metadata for each inner hit + val source = extractSource(innerHit, fieldAliases) + val metadata = extractHitMetadata(innerHit) + + // Recursively handle nested inner_hits if present + val nestedInnerHits = extractInnerHits(innerHit, fieldAliases) + + source ++ metadata ++ nestedInnerHits + } + .toList + } + .getOrElse(List.empty) + + innerHitName -> innerHitsList + } + .toMap + } + .getOrElse(Map.empty) + } + + /** Parse recursively aggregations from Elasticsearch response with parent context + */ + def parseAggregations( + aggsNode: JsonNode, + parentContext: Map[String, Any], + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] + ): Seq[Map[String, Any]] = { + + if (aggsNode.isMissingNode || !aggsNode.isObject) { + return Seq.empty + } + + // Find all buckets + val bucketAggs = aggsNode + .properties() + .asScala + .flatMap { entry => + val aggName = normalizeAggregationKey(entry.getKey) + val aggValue = entry.getValue + + // Détecter les agrégations avec buckets + Option(aggValue.get("buckets")) + .filter(n => n.isArray || n.isObject) + .map { buckets => + val bucketsList = if (buckets.isArray) { + buckets.elements().asScala.toList + } else { + // Named buckets (filters aggregation) + buckets + .properties() + .asScala + .map { bucketEntry => + val bucketNode = mapper.createObjectNode() + bucketNode.put("key", bucketEntry.getKey) + bucketNode.setAll( + bucketEntry.getValue + .asInstanceOf[com.fasterxml.jackson.databind.node.ObjectNode] + ) + bucketNode + } + .toList + } + (aggName, bucketsList, aggValue) + } + } + .toList + + val wrapperAggs = aggsNode + .properties() + .asScala + .filter { entry => + val aggValue = entry.getValue + // These aggregations have a doc_count but no buckets, and contain sub-aggregations + aggValue.has("doc_count") && + !aggValue.has("buckets") && + !aggValue.has("value") && + hasSubAggregations(aggValue) + } + .toList + + if (wrapperAggs.nonEmpty) { + // Process wrapper aggregations + wrapperAggs.flatMap { entry => + val aggName = normalizeAggregationKey(entry.getKey) + val aggValue = entry.getValue + val docCount = Option(aggValue.get("doc_count")) + .map(_.asLong()) + .getOrElse(0L) + + // Add the doc_count to the context if necessary + val currentContext = if (docCount > 0) { + parentContext + (s"${aggName}_doc_count" -> docCount) + } else { + parentContext + } + + // Extract subaggregations (excluding doc_count) + val subAggsNode = mapper.createObjectNode() + + val subAggFields = aggValue + .properties() + .asScala + .filterNot { subEntry => + Set("doc_count", "doc_count_error_upper_bound", "sum_other_doc_count") + .contains(subEntry.getKey) + } + .toList + + // Use a Java iterator to avoid casting problems + val jSubAggFields = subAggFields.asJava.iterator() + while (jSubAggFields.hasNext) { + val subEntry = jSubAggFields.next() + subAggsNode.set(subEntry.getKey, subEntry.getValue) + } + + // Recursively parse subaggregations + parseAggregations(subAggsNode, currentContext, fieldAliases, aggregations) + } + } else if (bucketAggs.isEmpty) { + // No buckets : it is a leaf aggregation (metrics or top_hits) + val metrics = extractMetrics(aggsNode) + val allTopHits = extractAllTopHits(aggsNode) + + if (allTopHits.nonEmpty) { + // Process each top_hits aggregation with their names + val topHitsData = allTopHits.map { case (topHitName, hits) => + // Determine if it is a multivalued aggregation (array_agg, ...) + val hasMultipleValues = aggregations.get(topHitName) match { + case Some(agg) => agg.multivalued + case None => + // Fallback on naming convention if aggregation is not found + !topHitName.toLowerCase.matches("(first|last)_.*") + } + + val processedHits = hits.map { hit => + val source = extractSource(hit, fieldAliases) + if (hasMultipleValues) { + source.size match { + case 0 => null + case 1 => + // If only one field in source and multivalued, return the value directly + val value = source.head._2 + value match { + case list: List[_] => list + case map: Map[_, _] => map + case other => other + } + case _ => + // Multiple fields: return as object + val metadata = extractHitMetadata(hit) + val innerHits = extractInnerHits(hit, fieldAliases) + source ++ metadata ++ innerHits + } + } else { + val metadata = extractHitMetadata(hit) + val innerHits = extractInnerHits(hit, fieldAliases) + source ++ metadata ++ innerHits + } + } + + // If multipleValues = true OR more than one hit, return a list + // If multipleValues = false AND only one hit, return an object + topHitName -> { + if (!hasMultipleValues && processedHits.size == 1) + processedHits.head + else { + if (aggregations.get(topHitName).exists(_.distinct)) + processedHits.distinct + else + processedHits + } + } + } + + Seq(parentContext ++ metrics ++ topHitsData) + + } else if (metrics.nonEmpty || parentContext.nonEmpty) { + Seq(parentContext ++ metrics) + } else { + Seq.empty + } + } else { + // Handle each aggregation with buckets + bucketAggs.flatMap { case (aggName, buckets, aggValue) => + buckets.flatMap { bucket => + val bucketKey = extractBucketKey(bucket) + val docCount = Option(bucket.get("doc_count")) + .map(_.asLong()) + .getOrElse(0L) + + val currentContext = parentContext ++ Map( + aggName -> bucketKey, + s"${aggName}_doc_count" -> docCount + ) + + // Check for sub-aggregations + val subAggFields = bucket + .properties() + .asScala + .filterNot { entry => + Set("key", "key_as_string", "doc_count", "from", "to").contains(entry.getKey) + } + .toList + + if (subAggFields.nonEmpty) { + val subAggsNode = mapper.createObjectNode() + val jsubAggFields = subAggFields.asJava.iterator() + while (jsubAggFields.hasNext) { + val entry = jsubAggFields.next() + subAggsNode.set(entry.getKey, entry.getValue) + } + /*subAggFields.foreach { entry => + subAggsNode.set(entry.getKey, entry.getValue) // FIXME + }*/ + parseAggregations(subAggsNode, currentContext, fieldAliases, aggregations) + } else { + Seq(currentContext) + } + } + } + } + } + + /** Extract the bucket key with proper typing (String, Long, Double, DateTime, etc.) + */ + def extractBucketKey(bucket: JsonNode): Any = { + // Préférer key_as_string pour les dates + val keyAsString = Option(bucket.get("key_as_string")) + .map(_.asText()) + + keyAsString + .map { strValue => + // Try to parse as an ISO 8601 date + tryParseAsDateTime(strValue).getOrElse(strValue) + } + .orElse { + Option(bucket.get("key")).map { keyNode => + if (keyNode.isTextual) { + val text = keyNode.asText() + tryParseAsDateTime(text).getOrElse(text) + } else if (keyNode.isIntegralNumber) { + val longValue = keyNode.asLong() + // If it looks like a timestamp in milliseconds, convert to Instant + if (longValue > 1000000000000L && longValue < 9999999999999L) { + Instant.ofEpochMilli(longValue).atZone(ZoneId.of("UTC")) + } else { + longValue + } + } else if (keyNode.isFloatingPointNumber) { + keyNode.asDouble() + } else if (keyNode.isBoolean) { + keyNode.asBoolean() + } else { + keyNode.asText() + } + } + } + .getOrElse("") + } + + /** Helper method to check if a node contains sub-aggregations */ + private def hasSubAggregations(node: JsonNode): Boolean = { + node.properties().asScala.exists { entry => + val key = entry.getKey + val value = entry.getValue + // Une sous-agrégation est un objet qui n'est pas un champ métadata + !Set("doc_count", "doc_count_error_upper_bound", "sum_other_doc_count", "bg_count", "score") + .contains(key) && value.isObject + } + } + + /** Try to parse a string as ZonedDateTime, LocalDateTime, LocalDate or LocalTime + */ + def tryParseAsDateTime(text: String): Option[Any] = { + Try(ZonedDateTime.parse(text, isoDateTimeFormatter)).toOption + .orElse(Try(LocalDateTime.parse(text, isoDateTimeFormatter)).toOption) + .orElse(Try(LocalDate.parse(text, isoDateFormatter)).toOption) + .orElse(Try(LocalTime.parse(text, isoTimeFormatter)).toOption) + } + + /** Extract metrics from an aggregation node + */ + def extractMetrics(aggsNode: JsonNode): Map[String, Any] = { + if (!aggsNode.isObject) return Map.empty + aggsNode + .properties() + .asScala + .flatMap { entry => + val name = normalizeAggregationKey(entry.getKey) + val value = entry.getValue + + // Detect simple metric values + Option(value.get("value")) + .filter(!_.isNull) + .map { metricValue => + val numericValue = if (metricValue.isIntegralNumber) { + metricValue.asLong() + } else if (metricValue.isFloatingPointNumber) { + metricValue.asDouble() + } else { + metricValue.asText() + } + name -> numericValue + } + .orElse { + // Stats aggregations + if (value.has("count") && value.has("sum") && value.has("avg")) { + Some( + name -> Map( + "count" -> value.get("count").asLong(), + "sum" -> Option(value.get("sum")).filterNot(_.isNull).map(_.asDouble()), + "avg" -> Option(value.get("avg")).filterNot(_.isNull).map(_.asDouble()), + "min" -> Option(value.get("min")).filterNot(_.isNull).map(_.asDouble()), + "max" -> Option(value.get("max")).filterNot(_.isNull).map(_.asDouble()) + ).collect { case (k, Some(v)) => k -> v; case (k, v: Long) => k -> v } + ) + } else { + None + } + } + .orElse { + // Percentiles + if (value.has("values") && value.get("values").isObject) { + val percentiles = value + .get("values") + .properties() + .asScala + .map { pEntry => + pEntry.getKey -> pEntry.getValue.asDouble() + } + .toMap + Some(name -> percentiles) + } else { + None + } + } + } + .toMap + } + + /** Extract all top_hits aggregations with their names and hits */ + def extractAllTopHits(aggsNode: JsonNode): Map[String, Seq[JsonNode]] = { + if (!aggsNode.isObject) return Map.empty + aggsNode + .properties() + .asScala + .collect { + case entry if entry.getValue.has("hits") => + val normalizedKey = normalizeAggregationKey(entry.getKey) + val hitsNode = entry.getValue.path("hits").path("hits") + val hits = if (hitsNode.isArray) { + hitsNode.elements().asScala.toSeq + } else { + Seq.empty + } + normalizedKey -> hits + } + .toMap + } + + /** Extract global metrics from aggregations (for hits + aggs case) + */ + def extractGlobalMetrics(aggsNode: JsonNode): Map[String, Any] = { + if (!aggsNode.isObject) return Map.empty + aggsNode + .properties() + .asScala + .flatMap { entry => + val name = entry.getKey + val value = entry.getValue + if (!value.has("buckets") && value.has("value")) { + val metricValue = value.get("value") + if (!metricValue.isNull) { + val numericValue = if (metricValue.isIntegralNumber) { + metricValue.asLong() + } else if (metricValue.isFloatingPointNumber) { + metricValue.asDouble() + } else { + metricValue.asText() + } + Some(name -> numericValue) + } else { + None + } + } else { + None + } + } + .toMap + } + + /** Convert recursively a JsonNode to Map + */ + def jsonNodeToMap(node: JsonNode, fieldAliases: Map[String, String]): Map[String, Any] = { + if (!node.isObject) return Map.empty + node + .properties() + .asScala + .map { entry => + val name = entry.getKey + fieldAliases.getOrElse(name, name) -> jsonNodeToAny(entry.getValue, fieldAliases) + } + .toMap + } + + /** Convert a JsonNode to Any (primitive types, List, Map) + */ + def jsonNodeToAny(node: JsonNode, fieldAliases: Map[String, String]): Any = { + if (node == null || node.isNull) null + else if (node.isBoolean) node.booleanValue() + else if (node.isNumber) node.numberValue() + else if (node.isTextual) { + val text = node.asText() + // Try to parse as date/time + tryParseAsDateTime(text).getOrElse(text) + } else if (node.isArray) { + node.elements().asScala.map(jsonNodeToAny(_, fieldAliases)).toList + } else if (node.isObject) { + jsonNodeToMap(node, fieldAliases) + } else { + node.asText() + } + } + + /** Normalize aggregation key by removing ES type prefix Examples: "cardinality#c" -> "c" + * "terms#category" -> "category" "c" -> "c" (unchanged) + */ + private[this] def normalizeAggregationKey(key: String): String = { + key.split('#') match { + case Array(_, suffix) if suffix.nonEmpty => suffix + case _ => key + } + } +} + +object ElasticConversion extends ElasticConversion with SerializationApi diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ElasticsearchVersion.scala b/core/src/main/scala/app/softnetwork/elastic/client/ElasticsearchVersion.scala new file mode 100644 index 00000000..49fb2a41 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/ElasticsearchVersion.scala @@ -0,0 +1,72 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +/** Elasticsearch version comparison utilities + */ +object ElasticsearchVersion { + + /** Parse Elasticsearch version string (e.g., "7.10.2", "8.11.0") + * @return + * (major, minor, patch) + */ + def parse(versionString: String): (Int, Int, Int) = { + try { + val parts = versionString.split('.').take(3) + val major = parts.headOption.map(_.toInt).getOrElse(0) + val minor = parts.lift(1).map(_.toInt).getOrElse(0) + val patch = parts.lift(2).map(_.toInt).getOrElse(0) + (major, minor, patch) + } catch { + case _: NumberFormatException => + throw new IllegalArgumentException(s"Invalid version format: $versionString") + } + } + + /** Check if version is >= target version + */ + def isAtLeast( + version: String, + targetMajor: Int, + targetMinor: Int = 0, + targetPatch: Int = 0 + ): Boolean = { + val (major, minor, patch) = parse(version) + + if (major > targetMajor) true + else if (major < targetMajor) false + else { // major == targetMajor + if (minor > targetMinor) true + else if (minor < targetMinor) false + else { // minor == targetMinor + patch >= targetPatch + } + } + } + + /** Check if PIT is supported (ES >= 7.10) + */ + def supportsPit(version: String): Boolean = { + isAtLeast(version, 7, 10) + } + + /** Check if version is ES 8+ + */ + def isEs8OrHigher(version: String): Boolean = { + isAtLeast(version, 8, 0) + } +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/FlushApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/FlushApi.scala new file mode 100644 index 00000000..67755b3c --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/FlushApi.scala @@ -0,0 +1,68 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ElasticFailure, ElasticResult, ElasticSuccess} + +trait FlushApi extends ElasticClientHelpers { + + /** Flush the index to make sure all operations are written to disk. + * @param index + * - the name of the index to flush + * @param force + * - true to force the flush, false otherwise + * @param wait + * - true to wait for the flush to complete, false otherwise + * @return + * true if the index was flushed successfully, false otherwise + */ + def flush(index: String, force: Boolean = true, wait: Boolean = true): ElasticResult[Boolean] = { + validateIndexName(index) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("refresh") + ) + ) + case None => // continue + } + + logger.debug(s"Flushing index: $index") + + executeFlush(index, force, wait) match { + case success @ ElasticSuccess(true) => + logger.info(s"✅ Index '$index' flushed successfully") + success + case success @ ElasticSuccess(_) => + logger.info(s"✅ Index '$index' not flushed") + success + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to flush index '$index': ${error.message}") + failure + } + + } + + private[client] def executeFlush( + index: String, + force: Boolean, + wait: Boolean + ): ElasticResult[Boolean] +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/GetApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/GetApi.scala new file mode 100644 index 00000000..745db1e6 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/GetApi.scala @@ -0,0 +1,279 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ + ElasticError, + ElasticFailure, + ElasticResult, + ElasticSuccess +} +import org.json4s.Formats + +import scala.concurrent.{ExecutionContext, Future, Promise} + +trait GetApi extends ElasticClientHelpers { _: SerializationApi => + + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + /** Check if a document exists by its id in the given index. + * @param id + * - the id of the document to check + * @param index + * - the name of the index to check the document in + * @return + * true if the document exists, false otherwise + */ + def exists(id: String, index: String): ElasticResult[Boolean] = { + get(id, index).map { + case Some(_) => true + case None => false + } + } + + /** Get a document by its id from the given index. + * @param id + * - the id of the document to get + * @param index + * - the name of the index to get the document from + * @return + * an Option containing the document as a JSON string if it was found, None otherwise + */ + def get(id: String, index: String): ElasticResult[Option[String]] = { + validateIndexName(index) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("get") + ) + ) + case None => // continue + } + + logger.debug(s"Getting document with id '$id' from index '$index'") + + executeGet(index, id) match { + case success @ ElasticSuccess(Some(_)) => + logger.info(s"✅ Successfully retrieved document with id '$id' from index '$index'") + success + case _ @ElasticSuccess(None) => + val error = + ElasticError( + message = s"Document with id '$id' not found in index '$index'", + statusCode = Some(404), + index = Some(index), + operation = Some("get") + ) + logger.error(s"❌ ${error.message}") + ElasticResult.failure(error) + case failure @ ElasticFailure(error) => + logger.error( + s"❌ Failed to retrieve document with id '$id' from index '$index': ${error.message}" + ) + failure + } + } + + /** Get an entity by its id from the given index. + * @param id + * - the id of the entity to get + * @param index + * - the name of the index to get the entity from (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @return + * an Option containing the entity if it was found, None otherwise + */ + def getAs[U <: AnyRef]( + id: String, + index: Option[String] = None, + maybeType: Option[String] = None + )(implicit m: Manifest[U], formats: Formats): ElasticResult[Option[U]] = { + val indexType = maybeType.getOrElse(m.runtimeClass.getSimpleName.toLowerCase) + val indexName = index.getOrElse(indexType) + get(id, indexName).flatMap { + case Some(jsonString) => + ElasticResult.attempt { + serialization.read[U](jsonString)(formats, m) + } match { + case ElasticSuccess(entity) => + logger.info(s"✅ Successfully retrieved document with id '$id' from index '$indexName'") + ElasticSuccess(Some(entity)) + case failure @ ElasticFailure(error) => + logger.error( + s"❌ Failed to retrieve document with id '$id' from index '$indexName': ${error.message}" + ) + failure + } + case None => + val error = + ElasticError( + message = s"Document with id '$id' not found in index '$indexName'", + statusCode = Some(404), + index = Some(indexName), + operation = Some("get") + ) + logger.error(s"❌ ${error.message}") + ElasticResult.failure(error) + } + } + + /** Get a document by its id from the given index asynchronously. + * @param id + * - the id of the document to get + * @param index + * - the name of the index to get the document from + * @return + * a Future that completes with an Option containing the document as a JSON string if it was + * found, None otherwise + */ + def getAsync( + id: String, + index: String + )(implicit ec: ExecutionContext): Future[ElasticResult[Option[String]]] = { + validateIndexName(index) match { + case Some(error) => + return Future.successful( + ElasticResult.failure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("getAsync") + ) + ) + ) + case None => // continue + } + + logger.debug(s"Getting document with id '$id' from index '$index' asynchronously") + + val promise: Promise[ElasticResult[Option[String]]] = Promise() + executeGetAsync(index, id) onComplete { + case scala.util.Success(result) => + result match { + case success @ ElasticSuccess(Some(_)) => + logger.info(s"✅ Successfully retrieved document with id '$id' from index '$index'") + promise.success(success) + case _ @ElasticSuccess(None) => + val error = + ElasticError( + message = s"Document with id '$id' not found in index '$index'", + statusCode = Some(404), + index = Some(index), + operation = Some("getAsync") + ) + logger.error(s"❌ ${error.message}") + promise.success(ElasticResult.failure(error)) + case failure @ ElasticFailure(error) => + logger.error( + s"❌ Failed to retrieve document with id '$id' from index '$index': ${error.message}" + ) + promise.success(failure) + } + case scala.util.Failure(exception) => + val error = + ElasticError( + message = + s"Exception occurred while retrieving document with id '$id' from index '$index': ${exception.getMessage}", + statusCode = Some(500), + index = Some(index), + operation = Some("getAsync") + ) + logger.error(s"❌ ${error.message}") + promise.success(ElasticResult.failure(error)) + } + + promise.future + + } + + /** Get an entity by its id from the given index asynchronously. + * @param id + * - the id of the entity to get + * @param index + * - the name of the index to get the entity from (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @return + * a Future that completes with an Option containing the entity if it was found, None otherwise + */ + def getAsyncAs[U <: AnyRef]( + id: String, + index: Option[String] = None, + maybeType: Option[String] = None + )(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Option[U]]] = { + val indexType = maybeType.getOrElse(m.runtimeClass.getSimpleName.toLowerCase) + val indexName = index.getOrElse(indexType) + getAsync(id, indexName).flatMap { + case ElasticSuccess(Some(jsonString)) => + ElasticResult + .attempt { + serialization.read[U](jsonString)(formats, m) + } + .map { entity => + logger.info(s"✅ Successfully retrieved document with id '$id' from index '$indexName'") + Some(entity) + } match { + case success @ ElasticSuccess(_) => Future.successful(success) + case failure @ ElasticFailure(error) => + logger.error( + s"❌ Failed to retrieve document with id '$id' from index '$indexName': ${error.message}" + ) + Future.successful(failure) + } + case ElasticSuccess(None) => + val error = + ElasticError( + message = s"Document with id '$id' not found in index '$indexName'", + statusCode = Some(404), + index = Some(indexName), + operation = Some("getAsyncAs") + ) + logger.error(s"❌ ${error.message}") + Future.successful(ElasticResult.failure(error)) + case failure @ ElasticFailure(error) => + logger.error( + s"❌ Failed to retrieve document with id '$id' from index '$indexName': ${error.message}" + ) + Future.successful(failure) + } + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + private[client] def executeGet( + index: String, + id: String + ): ElasticResult[Option[String]] + + private[client] def executeGetAsync( + index: String, + id: String + )(implicit ec: ExecutionContext): Future[ElasticResult[Option[String]]] +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/IndexApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/IndexApi.scala new file mode 100644 index 00000000..3a954a20 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/IndexApi.scala @@ -0,0 +1,217 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ + ElasticError, + ElasticFailure, + ElasticResult, + ElasticSuccess +} +import org.json4s.Formats + +import scala.concurrent.{ExecutionContext, Future, Promise} +import scala.reflect.ClassTag + +/** Index Management API + */ +trait IndexApi extends ElasticClientHelpers { _: RefreshApi with SerializationApi => + + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + /** Index an entity in the given index. + * @param entity + * - the entity to index + * @param id + * - the id of the entity to index + * @param index + * - the name of the index to index the entity in (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @return + * true if the entity was indexed successfully, false otherwise + */ + def indexAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String] = None, + maybeType: Option[String] = None + )(implicit u: ClassTag[U], formats: Formats): ElasticResult[Boolean] = { + val indexType = maybeType.getOrElse(u.runtimeClass.getSimpleName.toLowerCase) + val indexName = index.getOrElse(indexType) + + ElasticResult + .attempt { + serialization.write[U](entity) + } + .flatMap { source => + this.index(indexName, id, source) + } + } + + /** Index an entity in the given index. + * @param index + * - the name of the index to index the entity in + * @param id + * - the id of the entity to index + * @param source + * - the source of the entity to index in JSON format + * @return + * true if the entity was indexed successfully, false otherwise + */ + def index(index: String, id: String, source: String): ElasticResult[Boolean] = { + validateIndexName(index) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("index") + ) + ) + case None => // continue + } + + logger.debug(s"Indexing document with id '$id' in index '$index'") + + executeIndex(index, id, source) match { + case success @ ElasticSuccess(true) => + logger.info(s"✅ Document with id '$id' indexed successfully in index '$index'") + // Refresh the index to make sure the document is available for search + this.refresh(index) + success + case success @ ElasticSuccess(_) => + logger.info(s"✅ Document with id '$id' not indexed in index '$index'") + success + case failure @ ElasticFailure(error) => + logger.error( + s"❌ Failed to index document with id '$id' in index '$index': ${error.message}" + ) + failure + } + } + + /** Index an entity in the given index asynchronously. + * @param entity + * - the entity to index + * @param id + * - the id of the entity to index + * @param index + * - the name of the index to index the entity in (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @return + * a Future that completes with true if the entity was indexed successfully, false otherwise + */ + def indexAsyncAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String] = None, + maybeType: Option[String] = None + )(implicit + u: ClassTag[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Boolean]] = { + Future { + this.indexAs(entity, id, index, maybeType) + } + } + + /** Index an entity in the given index asynchronously. + * @param index + * - the name of the index to index the entity in + * @param id + * - the id of the entity to index + * @param source + * - the source of the entity to index in JSON format + * @return + * a Future that completes with true if the entity was indexed successfully, false otherwise + */ + def indexAsync(index: String, id: String, source: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = { + validateIndexName(index) match { + case Some(error) => + return Future.successful( + ElasticResult.failure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("indexAsync") + ) + ) + ) + case None => // continue + } + + logger.debug(s"Indexing asynchronously document with id '$id' in index '$index'") + + val promise: Promise[ElasticResult[Boolean]] = Promise() + + executeIndexAsync(index, id, source) onComplete { + case scala.util.Success(result) => + result match { + case success @ ElasticSuccess(true) => + logger.info(s"✅ Successfully indexed document with id '$id' in index '$index'") + // Refresh the index to make sure the document is available for search + this.refresh(index) + promise.success(success) + case success @ ElasticSuccess(_) => + logger.info(s"✅ Document with id '$id' not indexed in index '$index'") + promise.success(success) + case failure @ ElasticFailure(error) => + logger.error( + s"❌ Failed to index document with id '$id' in index '$index': ${error.message}" + ) + promise.success(failure) + } + case scala.util.Failure(exception) => + val error = ElasticError( + message = + s"Failed to index document with id '$id' in index '$index': ${exception.getMessage}", + operation = Some("indexAsync"), + index = Some(index), + cause = Some(exception) + ) + logger.error(s"❌ ${error.message}") + promise.success(ElasticResult.failure(error)) + } + + promise.future + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + private[client] def executeIndex( + index: String, + id: String, + source: String + ): ElasticResult[Boolean] + + private[client] def executeIndexAsync( + index: String, + id: String, + source: String + )(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/IndicesApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/IndicesApi.scala new file mode 100644 index 00000000..6bb7e8f1 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/IndicesApi.scala @@ -0,0 +1,414 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result._ + +/** Index management API. + * + * This implementation provides: + * - Robust error handling with [[ElasticResult]] + * - Detailed logging for debugging + * - Parameter validation + * - Automatic retry for transient errors + */ +trait IndicesApi extends ElasticClientHelpers { _: RefreshApi => + + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + /** Default settings for indices. This is used when creating an index without providing specific + * settings. It includes ngram tokenizer and analyzer, as well as some default limits. + */ + val defaultSettings: String = + """ + |{ + | "index": { + | "max_ngram_diff": "20", + | "mapping" : { + | "total_fields" : { + | "limit" : "2000" + | } + | }, + | "analysis": { + | "analyzer": { + | "ngram_analyzer": { + | "tokenizer": "ngram_tokenizer", + | "filter": [ + | "lowercase", + | "asciifolding" + | ] + | }, + | "search_analyzer": { + | "type": "custom", + | "tokenizer": "standard", + | "filter": [ + | "lowercase", + | "asciifolding" + | ] + | } + | }, + | "tokenizer": { + | "ngram_tokenizer": { + | "type": "ngram", + | "min_gram": 1, + | "max_gram": 20, + | "token_chars": [ + | "letter", + | "digit" + | ] + | } + | } + | } + | } + |} + """.stripMargin + + /** Create an index with the provided name and settings. + * @param index + * - the name of the index to create + * @param settings + * - the settings to apply to the index (default is defaultSettings) + * @return + * true if the index was created successfully, false otherwise + */ + def createIndex(index: String, settings: String = defaultSettings): ElasticResult[Boolean] = { + validateIndexName(index) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("createIndex"), + statusCode = Some(400), + index = Some(index), + message = s"Invalid index: ${error.message}" + ) + ) + case None => // OK + } + + validateJsonSettings(settings) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("createIndex"), + statusCode = Some(400), + message = s"Invalid settings: ${error.message}" + ) + ) + case None => // OK + } + + logger.info(s"Creating index '$index' with settings: $settings") + + executeCreateIndex(index, settings) match { + case success @ ElasticSuccess(true) => + logger.info(s"✅ Index '$index' created successfully") + success + case success @ ElasticSuccess(_) => + logger.info(s"✅ Index '$index' not created") + success + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to create index '$index': ${error.message}") + failure + } + } + + /** Delete an index with the provided name. + * @param index + * - the name of the index to delete + * @return + * true if the index was deleted successfully, false otherwise + */ + def deleteIndex(index: String): ElasticResult[Boolean] = { + validateIndexName(index) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("deleteIndex"), + statusCode = Some(400), + index = Some(index), + message = s"Invalid index: ${error.message}" + ) + ) + case None => // OK + } + + logger.info(s"Deleting index '$index'") + + executeDeleteIndex(index) match { + case success @ ElasticSuccess(true) => + logger.info(s"✅ Index '$index' deleted successfully") + success + case success @ ElasticSuccess(_) => + logger.info(s"✅ Index '$index' not deleted") + success + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to delete index '$index': ${error.message}") + failure + } + } + + /** Close an index with the provided name. + * @param index + * - the name of the index to close + * @return + * true if the index was closed successfully, false otherwise + */ + def closeIndex(index: String): ElasticResult[Boolean] = { + validateIndexName(index) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("closeIndex"), + statusCode = Some(400), + index = Some(index), + message = s"Invalid index: ${error.message}" + ) + ) + case None => // OK + } + + logger.info(s"Closing index '$index'") + + executeCloseIndex(index) match { + case success @ ElasticSuccess(true) => + logger.info(s"✅ Index '$index' closed successfully") + success + case success @ ElasticSuccess(_) => + logger.info(s"✅ Index '$index' not closed") + success + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to close index '$index': ${error.message}") + failure + } + } + + /** Open an index with the provided name. + * @param index + * - the name of the index to open + * @return + * true if the index was opened successfully, false otherwise + */ + def openIndex(index: String): ElasticResult[Boolean] = { + validateIndexName(index) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("openIndex"), + statusCode = Some(400), + index = Some(index), + message = s"Invalid index: ${error.message}" + ) + ) + case None => // OK + } + + logger.info(s"Opening index '$index'") + + executeOpenIndex(index) match { + case success @ ElasticSuccess(true) => + logger.info(s"✅ Index '$index' opened successfully") + success + case success @ ElasticSuccess(_) => + logger.info(s"✅ Index '$index' not opened") + success + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to open index '$index': ${error.message}") + failure + } + } + + /** Reindex from source index to target index. + * @param sourceIndex + * - the name of the source index + * @param targetIndex + * - the name of the target index + * @param refresh + * - true to refresh the target index after reindexing, false otherwise + * @return + * true and the number of documents re indexed if the reindexing was successful, false + * otherwise + */ + def reindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean = true + ): ElasticResult[(Boolean, Option[Long])] = { + // Validation... + validateIndexName(sourceIndex) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("reindex"), + statusCode = Some(400), + index = Some(sourceIndex), + message = s"Invalid source index: ${error.message}" + ) + ) + case None => // OK + } + + validateIndexName(targetIndex) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("reindex"), + statusCode = Some(400), + index = Some(targetIndex), + message = s"Invalid target index: ${error.message}" + ) + ) + case None => // OK + } + + if (sourceIndex == targetIndex) { + return ElasticFailure( + ElasticError( + message = "Source and target index cannot be the same", + cause = None, + statusCode = Some(400), + index = Some(sourceIndex), + operation = Some("reindex") + ) + ) + } + + logger.info(s"Reindexing from '$sourceIndex' to '$targetIndex' (refresh=$refresh)") + + // Existence checks... + indexExists(sourceIndex) match { + case ElasticSuccess(false) => + return ElasticFailure( + ElasticError( + message = s"Source index '$sourceIndex' does not exist", + cause = None, + statusCode = Some(404), + index = Some(sourceIndex), + operation = Some("reindex") + ) + ) + case ElasticFailure(error) => return ElasticFailure(error) + case _ => // OK + } + + indexExists(targetIndex) match { + case ElasticSuccess(false) => + return ElasticFailure( + ElasticError( + message = s"Target index '$targetIndex' does not exist", + cause = None, + statusCode = Some(404), + index = Some(targetIndex), + operation = Some("reindex") + ) + ) + case ElasticFailure(error) => return ElasticFailure(error) + case _ => // OK + } + + // ✅ Performing the reindex with extracting the number of documents + executeReindex(sourceIndex, targetIndex, refresh) match { + case ElasticFailure(error) => + logger.error(s"Reindex failed for index '$targetIndex': ${error.message}") + ElasticFailure(error) + + case ElasticSuccess((true, docsCount)) => + val countStr = docsCount.map(c => s" ($c documents)").getOrElse("") + logger.info(s"✅ Reindex from '$sourceIndex' to '$targetIndex' succeeded$countStr") + + if (refresh) { + this.refresh(targetIndex) match { + case ElasticSuccess(_) => + logger.debug(s"✅ Target index '$targetIndex' refreshed") + ElasticSuccess((true, docsCount)) + case ElasticFailure(error) => + logger.warn( + s"✅ Refresh failed but reindex succeeded for index '$targetIndex': ${error.message}" + ) + ElasticSuccess((true, docsCount)) + } + } else { + ElasticSuccess((true, docsCount)) + } + + case ElasticSuccess((false, _)) => + ElasticFailure( + ElasticError( + message = s"Reindex failed for index '$targetIndex'", + cause = None, + statusCode = None, + index = Some(targetIndex), + operation = Some("reindex") + ) + ) + } + } + + /** Check if an index exists. + * @param index + * - the name of the index to check + * @return + * true if the index exists, false otherwise + */ + def indexExists(index: String): ElasticResult[Boolean] = { + validateIndexName(index) match { + case Some(error) => + return ElasticFailure( + error.copy( + operation = Some("indexExists"), + statusCode = Some(400), + index = Some(index), + message = s"Invalid index: ${error.message}" + ) + ) + case None => // OK + } + + logger.debug(s"Checking if index '$index' exists") + + executeIndexExists(index) match { + case success @ ElasticSuccess(exists) => + val existenceStr = if (exists) "exists" else "does not exist" + logger.debug(s"✅ Index '$index' $existenceStr") + success + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to check existence of index '$index': ${error.message}") + failure + } + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + private[client] def executeCreateIndex(index: String, settings: String): ElasticResult[Boolean] + + private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] + + private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] + + private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] + + private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] + + private[client] def executeIndexExists(index: String): ElasticResult[Boolean] +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/MappingApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/MappingApi.scala new file mode 100644 index 00000000..d274bac1 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/MappingApi.scala @@ -0,0 +1,364 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ + ElasticError, + ElasticFailure, + ElasticResult, + ElasticSuccess +} +import com.google.gson.JsonParser + +import java.util.UUID + +/** Mapping management API. + */ +trait MappingApi extends ElasticClientHelpers { _: SettingsApi with IndicesApi with RefreshApi => + + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + /** Set the mapping of an index. + * @param index + * - the name of the index to set the mapping for + * @param mapping + * - the mapping to set on the index + * @return + * true if the mapping was set successfully, false otherwise + */ + def setMapping(index: String, mapping: String): ElasticResult[Boolean] = { + validateIndexName(index) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("setMapping") + ) + ) + case None => // continue + } + + validateJson("mapping", mapping) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid mapping: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("setMapping") + ) + ) + case None => // continue + } + + logger.debug(s"Setting mapping for index '$index': $mapping") + + executeSetMapping(index, mapping) match { + case success @ ElasticSuccess(true) => + logger.info(s"✅ Mapping for index '$index' updated successfully") + success + case success @ ElasticSuccess(_) => + logger.info(s"✅ Mapping for index '$index' not updated") + success + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to update mapping for index '$index': ${error.message}") + failure + } + } + + /** Get the mapping of an index. + * @param index + * - the name of the index to get the mapping for + * @return + * the mapping of the index as a JSON string + */ + def getMapping(index: String): ElasticResult[String] = { + validateIndexName(index) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("getMapping") + ) + ) + case None => // continue + } + + logger.debug(s"Getting mapping for index '$index'") + + executeGetMapping(index) + } + + /** Get the mapping properties of an index. + * @param index + * - the name of the index to get the mapping properties for + * @return + * the mapping properties of the index as a JSON string + */ + def getMappingProperties(index: String): ElasticResult[String] = + getMapping(index) + + /** Check if the mapping of an index is different from the provided mapping. + * @param index + * - the name of the index to check + * @param mapping + * - the mapping to compare with the current mapping of the index + * @return + * true if the mapping is different, false otherwise + */ + def shouldUpdateMapping( + index: String, + mapping: String + ): ElasticResult[Boolean] = { + getMappingProperties(index).map { properties => + MappingComparator.isMappingDifferent(properties, mapping) + } + } + + /** Update the mapping of an index to a new mapping. + * + * This method handles three scenarios: + * 1. Index doesn't exist: Create it with the new mapping 2. Index exists but mapping is + * outdated: Migrate to new mapping 3. Index exists and mapping is current: Do nothing + * + * @param index + * - the name of the index to migrate + * @param mapping + * - the new mapping to set on the index + * @param settings + * - the settings to apply to the index (default is defaultSettings) + * @return + * true if the mapping was created or updated successfully, false otherwise + */ + def updateMapping( + index: String, + mapping: String, + settings: String = defaultSettings + ): ElasticResult[Boolean] = { + indexExists(index).flatMap { + case false => + // Scenario 1: Index doesn't exist + createIndexWithMapping(index, mapping, settings) + + case true => + // Check if mapping needs update + shouldUpdateMapping(index, mapping).flatMap { + case true => + // Scenario 2: Migrate to new mapping + logger.info(s"Mapping for index '$index' needs update. Starting migration.") + migrateMappingWithRollback(index, mapping, settings) + + case false => + // Scenario 3: Mapping is current + logger.info(s"✅ Mapping for index '$index' is already up to date") + ElasticResult.success(true) + + } + } + } + + /** Create a new index with the given mapping. + */ + private def createIndexWithMapping( + index: String, + mapping: String, + settings: String + ): ElasticResult[Boolean] = { + logger.info(s"Creating new index '$index' with mapping") + + for { + _ <- createIndex(index, settings) + .filter(_ == true, s"Failed to create index '$index'") + .logSuccess(logger, _ => s"✅ Index '$index' created successfully") + + _ <- setMapping(index, mapping) + .filter(_ == true, s"Failed to set mapping for index '$index'") + .logSuccess(logger, _ => s"✅ Mapping for index '$index' set successfully") + + } yield true + } + + private def migrateMappingWithRollback( + index: String, + newMapping: String, + settings: String + ): ElasticResult[Boolean] = { + + val tempIndex = s"${index}_tmp_${UUID.randomUUID().toString.take(8)}" + + // Backup original state + val backupResult = for { + originalMapping <- getMapping(index) + originalSettings <- loadSettings(index) + } yield (originalMapping, originalSettings) + + backupResult match { + case ElasticSuccess((origMapping, origSettings)) => + logger.info(s"✅ Backed up original mapping and settings for '$index'") + + val migrationResult = performMigration( + index, + tempIndex, + newMapping, + settings + ) + + migrationResult match { + case ElasticSuccess(true) => + logger.info(s"✅ Migration completed successfully for '$index'") + ElasticSuccess(true) + + case ElasticFailure(error) => + logger.error(s"❌ Migration failed for '$index': ${error.fullMessage}") + logger.info(s"Attempting rollback for '$index'") + + rollbackMigration(index, tempIndex, origMapping, origSettings) match { + case ElasticSuccess(_) => + logger.info(s"✅ Rollback completed successfully for '$index'") + case ElasticFailure(rollbackError) => + logger.error(s"❌ Rollback failed for '$index': ${rollbackError.fullMessage}") + } + + ElasticFailure(error) + } + + case ElasticFailure(error) => + logger.error(s"❌ Failed to backup original state for '$index': ${error.fullMessage}") + ElasticFailure(error) + } + } + + /** Migrate an existing index to a new mapping. + * + * Process: + * 1. Create temporary index with new mapping 2. Reindex data from original to temporary 3. + * Delete original index 4. Recreate original index with new mapping 5. Reindex data from + * temporary to original 6. Delete temporary index + */ + private def performMigration( + index: String, + tempIndex: String, + mapping: String, + settings: String + ): ElasticResult[Boolean] = { + + logger.info(s"Starting migration: $index -> $tempIndex") + + for { + // Create temp index + _ <- createIndex(tempIndex, settings) + .filter(_ == true, s"❌ Failed to create temp index '$tempIndex'") + + _ <- setMapping(tempIndex, mapping) + .filter(_ == true, s"❌ Failed to set mapping on temp index") + + // Reindex to temp + _ <- reindex(index, tempIndex, refresh = true) + .filter(_._1 == true, s"❌ Failed to reindex to temp") + + // Delete original + _ <- deleteIndex(index) + .filter(_ == true, s"❌ Failed to delete original index") + + // Recreate original with new mapping + _ <- createIndex(index, settings) + .filter(_ == true, s"❌ Failed to recreate original index") + + _ <- setMapping(index, mapping) + .filter(_ == true, s"❌ Failed to set new mapping") + + // Reindex back from temp + _ <- reindex(tempIndex, index, refresh = true) + .filter(_._1 == true, s"❌ Failed to reindex from temp") + + _ <- openIndex(index) + .filter(_ == true, s"❌ Failed to open index") + + // Cleanup temp + _ <- deleteIndex(tempIndex) + + } yield { + logger.info(s"✅ Migration completed: $index") + true + } + } + + private def rollbackMigration( + index: String, + tempIndex: String, + originalMapping: String, + originalSettings: String + ): ElasticResult[Boolean] = { + + logger.warn(s"Rolling back migration for '$index'") + + for { + // Check if temp index exists and has data + tempExists <- indexExists(tempIndex) + + // Delete current (potentially corrupted) index if it exists + _ <- indexExists(index).flatMap { + case true => deleteIndex(index) + case false => ElasticResult.success(true) + } + + // Recreate with original settings and mapping + _ <- createIndex(index, originalSettings) + .filter(_ == true, s"❌ Rollback: Failed to recreate index") + + _ <- setMapping(index, originalMapping) + .filter(_ == true, s"❌ Rollback: Failed to restore mapping") + + // If temp exists, reindex from it + _ <- + if (tempExists) { + reindex(tempIndex, index, refresh = true) + .filter(_._1 == true, s"❌ Rollback: Failed to reindex from temp") + } else { + ElasticResult.success(true) + } + + _ <- openIndex(index) + + // Cleanup temp if it exists + _ <- + if (tempExists) { + deleteIndex(tempIndex) + } else { + ElasticResult.success(true) + } + + } yield { + logger.info(s"✅ Rollback index completed for '$index'") + true + } + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + private[client] def executeSetMapping(index: String, mapping: String): ElasticResult[Boolean] + + private[client] def executeGetMapping(index: String): ElasticResult[String] +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/MappingComparator.scala b/core/src/main/scala/app/softnetwork/elastic/client/MappingComparator.scala index f9b9d1b3..badb9962 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/MappingComparator.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/MappingComparator.scala @@ -19,8 +19,7 @@ package app.softnetwork.elastic.client import com.google.gson._ import com.typesafe.scalalogging.StrictLogging -//import scala.jdk.CollectionConverters._ -import scala.collection.JavaConverters._ +import scala.jdk.CollectionConverters._ import scala.util.{Failure, Success, Try} object MappingComparator extends StrictLogging { diff --git a/core/src/main/scala/app/softnetwork/elastic/client/RefreshApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/RefreshApi.scala new file mode 100644 index 00000000..2ac3ee42 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/RefreshApi.scala @@ -0,0 +1,63 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ElasticFailure, ElasticResult, ElasticSuccess} + +/** Refresh management API for Elasticsearch clients. + * @see + * [[RefreshApi]] for generic API documentation + */ +trait RefreshApi extends ElasticClientHelpers { + + /** Refresh the index to make sure all documents are indexed and searchable. + * @param index + * - the name of the index to refresh + * @return + * true if the index was refreshed successfully, false otherwise + */ + def refresh(index: String): ElasticResult[Boolean] = { + validateIndexName(index) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("refresh") + ) + ) + case None => // continue + } + + logger.debug(s"Refreshing index: $index") + + executeRefresh(index) match { + case success @ ElasticSuccess(true) => + logger.info(s"✅ Index '$index' refreshed successfully") + success + case success @ ElasticSuccess(_) => + logger.info(s"✅ Index '$index' not refreshed") + success + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to refresh index '$index': ${error.message}") + failure + } + } + + private[client] def executeRefresh(index: String): ElasticResult[Boolean] +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala new file mode 100644 index 00000000..253b6e05 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/ScrollApi.scala @@ -0,0 +1,318 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import akka.NotUsed +import akka.actor.ActorSystem +import akka.stream.scaladsl.{Sink, Source} +import app.softnetwork.elastic.client.scroll.{ + ScrollConfig, + ScrollMetrics, + ScrollStrategy, + UsePIT, + UseScroll, + UseSearchAfter +} +import app.softnetwork.elastic.sql.query.{SQLAggregation, SQLQuery} +import org.json4s.{Formats, JNothing} +import org.json4s.jackson.JsonMethods.parse + +import scala.concurrent.{ExecutionContext, Promise} +import scala.util.{Failure, Success} + +/** API for scrolling through search results using Akka Streams. + * + * ==Error Handling== + * + * This API handles transient errors automatically in the implementation: + * - Network timeouts are retried + * - Expired scroll contexts are handled gracefully + * - Elasticsearch errors are logged and recovered + * + * Users can add additional error handling using standard Akka Streams operators: + * + * {{{ + * // Add custom recovery + * client.scroll(sqlQuery) + * .recover { + * case ex: MyException => handleError(ex) + * } + * .runWith(Sink.seq) + * + * // Add supervision strategy + * implicit val decider: Supervision.Decider = { + * case _: TransientException => Supervision.Resume + * case _ => Supervision.Stop + * } + * + * client.scroll(sqlQuery) + * .withAttributes(ActorAttributes.supervisionStrategy(decider)) + * .runWith(Sink.seq) + * }}} + * + * ==Performance== + * + * The implementation automatically selects the most efficient strategy: + * + * {{{ + * ┌─────────────────┬───────────────┬──────────────────────────────────┐ + * │ ES Version │ Aggregations │ Strategy │ + * ├─────────────────┼───────────────┼──────────────────────────────────┤ + * │ 7.10+ │ No │ PIT + search_after (recommended) │ + * │ 7.10+ │ Yes │ Classic scroll │ + * │ < 7.10 │ No │ search_after │ + * │ < 7.10 │ Yes │ Classic scroll │ + * └─────────────────┴───────────────┴──────────────────────────────────┘ + * }}} + * + * '''Point In Time (PIT) + search_after''' (ES 7.10+, no aggregations): + * - Provides a consistent snapshot of data across pagination + * - No scroll context timeout issues + * - Better resource usage and performance + * - Automatic cleanup on completion + * + * '''search_after''' (ES < 7.10, no aggregations): + * - Efficient pagination without server-side state + * - Suitable for deep pagination + * - Requires explicit sort fields + * + * '''Classic scroll''' (all versions, with aggregations): + * - Required for queries with aggregations + * - Maintains a consistent snapshot + * - Automatic cleanup of scroll contexts + * - Subject to scroll timeout (configurable via keepAlive) + * + * @note + * PIT is not supported for aggregation queries. The implementation automatically falls back to + * classic scroll when aggregations are detected. + * @see + * [[ScrollConfig]] for configuration options + * @see + * [[https://www.elastic.co/guide/en/elasticsearch/reference/7.10/point-in-time-api.html PIT API Documentation]] + */ +trait ScrollApi extends ElasticClientHelpers { + _: VersionApi with SearchApi => + + // ======================================================================== + // MAIN SCROLL METHODS + // ======================================================================== + + /** Create a scrolling source with automatic strategy selection + */ + def scroll( + sql: SQLQuery, + config: ScrollConfig = ScrollConfig() + )(implicit system: ActorSystem): Source[(Map[String, Any], ScrollMetrics), NotUsed] = { + sql.request match { + case Some(Left(single)) => + val sqlRequest = single.copy(score = sql.score) + val elasticQuery = + ElasticQuery(sqlRequest, collection.immutable.Seq(sqlRequest.sources: _*)) + scrollWithMetrics( + elasticQuery, + sqlRequest.fieldAliases, + sqlRequest.sqlAggregations, + config, + single.sorts.nonEmpty + ) + + case Some(Right(_)) => + Source.failed( + new UnsupportedOperationException("Scrolling is not supported for multi-search queries") + ) + + case None => + Source.failed( + new IllegalArgumentException("SQL query does not contain a valid search request") + ) + } + } + + /** Classic scroll (works for both hits and aggregations) + */ + private[client] def scrollClassic( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation], + config: ScrollConfig + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] + + /** Search After (only for hits, more efficient) + */ + private[client] def searchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean = false + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] + + private[client] def pitSearchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean = false + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] + + /** Typed scroll source + */ + def scrollAs[T]( + sql: SQLQuery, + config: ScrollConfig = ScrollConfig() + )(implicit + system: ActorSystem, + m: Manifest[T], + formats: Formats + ): Source[(T, ScrollMetrics), NotUsed] = { + scroll(sql, config).map { row => + (convertTo[T](row._1)(m, formats), row._2) + } + } + + // ======================================================================== + // PRIVATE METHODS + // ======================================================================== + + /** Determine the best scroll strategy based on the query + */ + private def determineScrollStrategy( + elasticQuery: ElasticQuery, + aggregations: Map[String, SQLAggregation] + ): ScrollStrategy = { + // If aggregations are present, use classic scrolling + if (aggregations.nonEmpty) { + UseScroll + } else { + // Check if the query contains aggregations in the JSON + if (hasAggregations(elasticQuery.query)) { + UseScroll + } else { + // Detect version and choose implementation + version match { + case result.ElasticSuccess(v) => + if (ElasticsearchVersion.supportsPit(v)) { + logger.info(s"ES version $v supports PIT, using pitSearchAfterSource") + UsePIT + } else { + logger.info(s"ES version $v does not support PIT, using classic search_after") + UseSearchAfter + } + case result.ElasticFailure(err) => + throw new RuntimeException(s"Failed to get ES version: $err") + } + } + } + } + + /** Scroll with metrics tracking + */ + private def scrollWithMetrics( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation], + config: ScrollConfig, + hasSorts: Boolean = false + )(implicit system: ActorSystem): Source[(Map[String, Any], ScrollMetrics), NotUsed] = { + + implicit val ec: ExecutionContext = system.dispatcher + + val metricsPromise = Promise[ScrollMetrics]() + + scroll(elasticQuery, fieldAliases, aggregations, config, hasSorts) + .take(config.maxDocuments.getOrElse(Long.MaxValue)) + .grouped(config.scrollSize) + .statefulMapConcat { () => + var metrics = config.metrics // Thread-safe as statefulMapConcat is single-threaded + batch => { + metrics = metrics.copy( + totalDocuments = metrics.totalDocuments + batch.size, + totalBatches = metrics.totalBatches + 1 + ) + + if (metrics.totalBatches % config.logEvery == 0) { + logger.info( + s"Scroll progress: ${metrics.totalDocuments} docs, " + + s"${metrics.totalBatches} batches, " + + s"${metrics.documentsPerSecond} docs/sec" + ) + } + batch.map(doc => (doc, metrics)) + } + + } + .alsoTo(Sink.last.mapMaterializedValue { lastFuture => + lastFuture + .map(_._2) + .onComplete { + case Success(finalMetrics) => + val completed = finalMetrics.complete + logger.info( + s"Scroll completed: ${completed.totalDocuments} docs in ${completed.duration}ms " + + s"(${completed.documentsPerSecond} docs/sec)" + ) + metricsPromise.success(completed) + case Failure(ex) => + logger.error("Failed to get final metrics", ex) + metricsPromise.failure(ex) + }(system.dispatcher) + }) + .mapMaterializedValue(_ => NotUsed) + } + + private def hasAggregations(query: String): Boolean = { + try { + val json = parse(query) + (json \ "aggregations") != JNothing || (json \ "aggs") != JNothing + } catch { + case _: Exception => false + } + } + + /** Create a scrolling source for JSON query with automatic strategy + */ + private def scroll( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation], + config: ScrollConfig, + hasSorts: Boolean + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = { + val strategy = determineScrollStrategy(elasticQuery, aggregations) + + logger.info( + s"Using scroll strategy: $strategy for query on ${elasticQuery.indices.mkString(", ")}" + ) + + strategy match { + case UseScroll => + logger.info("Using classic scroll (supports aggregations)") + scrollClassic(elasticQuery, fieldAliases, aggregations, config) + + case UseSearchAfter if config.preferSearchAfter => + logger.info("Using search_after (optimized for hits only)") + searchAfter(elasticQuery, fieldAliases, config, hasSorts) + + case UsePIT => + logger.info("Using PIT + search_after (optimized for hits only)") + pitSearchAfter(elasticQuery, fieldAliases, config, hasSorts) + + case _ => + logger.info("Falling back to classic scroll") + scrollClassic(elasticQuery, fieldAliases, aggregations, config) + } + } + +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala new file mode 100644 index 00000000..c844482e --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/SearchApi.scala @@ -0,0 +1,938 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ + ElasticError, + ElasticFailure, + ElasticResult, + ElasticSuccess +} +import app.softnetwork.elastic.sql.query.{SQLAggregation, SQLQuery, SQLSearchRequest} +import com.google.gson.{Gson, JsonElement, JsonObject, JsonParser} +import org.json4s.Formats + +import scala.concurrent.{ExecutionContext, Future} +import scala.jdk.CollectionConverters._ +import scala.reflect.{classTag, ClassTag} +import scala.util.{Failure, Success, Try} + +//format:off +/** Elasticsearch search API with unified error handling via ElasticResult. + * + * @example + * {{{ + * class MyClient extends SearchApi { + * // Implementation of abstract methods + * } + * + * val client = new MyClient() + * val result = client.searchAs[User]("SELECT * FROM users WHERE age > 30") + * }}} + */ +//format:on +trait SearchApi extends ElasticConversion with ElasticClientHelpers { + + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + /** Search for documents / aggregations matching the SQL query. + * + * @param sql + * the SQL query to execute + * @return + * the Elasticsearch response + */ + def search(sql: SQLQuery): ElasticResult[ElasticResponse] = { + sql.request match { + case Some(Left(single)) => + val elasticQuery = ElasticQuery( + single, + collection.immutable.Seq(single.sources: _*) + ) + singleSearch(elasticQuery, single.fieldAliases, single.sqlAggregations) + + case Some(Right(multiple)) => + val elasticQueries = ElasticQueries( + multiple.requests.map { query => + ElasticQuery( + query, + collection.immutable.Seq(query.sources: _*) + ) + }.toList + ) + multiSearch(elasticQueries, multiple.fieldAliases, multiple.sqlAggregations) + + case None => + logger.error( + s"❌ Failed to execute search for query '${sql.query}'" + ) + ElasticResult.failure( + ElasticError( + message = s"SQL query does not contain a valid search request: ${sql.query}", + operation = Some("search") + ) + ) + } + } + + /** Search for documents / aggregations matching the Elasticsearch query. + * + * @param elasticQuery + * the Elasticsearch query + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @return + * the Elasticsearch response + */ + def singleSearch( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + ): ElasticResult[ElasticResponse] = { + validateJson("search", elasticQuery.query) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid query: ${error.message}", + statusCode = Some(400), + index = Some(elasticQuery.indices.mkString(",")), + operation = Some("search") + ) + ) + case None => // continue + } + + logger.debug( + s"Searching with query '${elasticQuery.query}' in indices '${elasticQuery.indices.mkString(",")}'" + ) + + executeSingleSearch(elasticQuery) match { + case ElasticSuccess(Some(response)) => + logger.info( + s"✅ Successfully executed search in indices '${elasticQuery.indices.mkString(",")}'" + ) + ElasticResult.success( + ElasticResponse( + elasticQuery.query, + response, + fieldAliases, + aggregations.map(kv => kv._1 -> kv._2) + ) + ) + case ElasticSuccess(_) => + val error = + ElasticError( + message = + s"Failed to execute search in indices '${elasticQuery.indices.mkString(",")}'", + index = Some(elasticQuery.indices.mkString(",")), + operation = Some("search") + ) + logger.error(s"❌ ${error.message}") + ElasticResult.failure(error) + case ElasticFailure(error) => + logger.error( + s"❌ Failed to execute search in indices '${elasticQuery.indices.mkString(",")}': ${error.message}" + ) + ElasticResult.failure( + error.copy( + operation = Some("search"), + index = Some(elasticQuery.indices.mkString(",")) + ) + ) + } + + } + + /** Multi-search with Elasticsearch queries. + * + * @param elasticQueries + * Elasticsearch queries + * @param fieldAliases + * field aliases + * @param aggregations + * SQL aggregations + * @return + * the combined Elasticsearch response + */ + def multiSearch( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + ): ElasticResult[ElasticResponse] = { + elasticQueries.queries.flatMap { elasticQuery => + validateJson("search", elasticQuery.query).map(error => + elasticQuery.indices.mkString(",") -> error.message + ) + } match { + case Nil => // continue + case errors => + return ElasticResult.failure( + ElasticError( + message = s"Invalid queries: ${errors.map(_._2).mkString(",")}", + statusCode = Some(400), + index = Some(errors.map(_._1).mkString(",")), + operation = Some("multiSearch") + ) + ) + } + + logger.debug( + s"Multi-searching with ${elasticQueries.queries.size} queries" + ) + + executeMultiSearch(elasticQueries) match { + case ElasticSuccess(Some(response)) => + logger.info( + s"✅ Successfully executed multi-search with ${elasticQueries.queries.size} queries" + ) + ElasticResult.success( + ElasticResponse( + elasticQueries.queries.map(_.query).mkString("\n"), + response, + fieldAliases, + aggregations.map(kv => kv._1 -> kv._2) + ) + ) + case ElasticSuccess(_) => + val error = + ElasticError( + message = s"Failed to execute multi-search with ${elasticQueries.queries.size} queries", + operation = Some("multiSearch") + ) + logger.error(s"❌ ${error.message}") + ElasticResult.failure(error) + case ElasticFailure(error) => + logger.error( + s"❌ Failed to execute multi-search with ${elasticQueries.queries.size} queries: ${error.message}" + ) + ElasticResult.failure( + error.copy( + operation = Some("multiSearch") + ) + ) + } + } + + // ======================================================================== + // ASYNCHRONOUS SEARCH METHODS + // ======================================================================== + + /** Asynchronous search for documents / aggregations matching the SQL query. + * + * @param sqlQuery + * the SQL query + * @return + * a Future containing the Elasticsearch response + */ + def searchAsync( + sqlQuery: SQLQuery + )(implicit + ec: ExecutionContext + ): Future[ElasticResult[ElasticResponse]] = { + sqlQuery.request match { + case Some(Left(single)) => + val elasticQuery = ElasticQuery( + single, + collection.immutable.Seq(single.sources: _*) + ) + singleSearchAsync(elasticQuery, single.fieldAliases, single.sqlAggregations) + + case Some(Right(multiple)) => + val elasticQueries = ElasticQueries( + multiple.requests.map { query => + ElasticQuery( + query, + collection.immutable.Seq(query.sources: _*) + ) + }.toList + ) + multiSearchAsync(elasticQueries, multiple.fieldAliases, multiple.sqlAggregations) + + case None => + logger.error( + s"❌ Failed to execute asynchronous search for query '${sqlQuery.query}'" + ) + Future.successful( + ElasticResult.failure( + ElasticError( + message = s"SQL query does not contain a valid search request: ${sqlQuery.query}", + operation = Some("searchAsync") + ) + ) + ) + } + } + + /** Asynchronous search for documents / aggregations matching the Elasticsearch query. + * + * @param elasticQuery + * the Elasticsearch query + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @return + * a Future containing the Elasticsearch response + */ + def singleSearchAsync( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit + ec: ExecutionContext + ): Future[ElasticResult[ElasticResponse]] = { + executeSingleSearchAsync(elasticQuery).flatMap { + case ElasticSuccess(Some(response)) => + logger.info( + s"✅ Successfully executed asynchronous search for query '${elasticQuery.query}'" + ) + Future.successful( + ElasticResult.success( + ElasticResponse( + elasticQuery.query, + response, + fieldAliases, + aggregations.map(kv => kv._1 -> kv._2) + ) + ) + ) + case ElasticSuccess(_) => + val error = + ElasticError( + message = s"Failed to execute asynchronous search for query '${elasticQuery.query}'", + index = Some(elasticQuery.indices.mkString(",")), + operation = Some("searchAsync") + ) + logger.error(s"❌ ${error.message}") + Future.successful(ElasticResult.failure(error)) + case ElasticFailure(error) => + logger.error( + s"❌ Failed to execute asynchronous search for query '${elasticQuery.query}': ${error.message}" + ) + Future.successful( + ElasticResult.failure( + error.copy( + operation = Some("searchAsync"), + index = Some(elasticQuery.indices.mkString(",")) + ) + ) + ) + } + } + + /** Asynchronous multi-search with Elasticsearch queries. + * + * @param elasticQueries + * the Elasticsearch queries + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @return + * a Future containing the combined Elasticsearch response + */ + def multiSearchAsync( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit + ec: ExecutionContext + ): Future[ElasticResult[ElasticResponse]] = { + executeMultiSearchAsync(elasticQueries).flatMap { + case ElasticSuccess(Some(response)) => + logger.info( + s"✅ Successfully executed asynchronous multi-search with ${elasticQueries.queries.size} queries" + ) + Future.successful( + ElasticResult.success( + ElasticResponse( + elasticQueries.queries.map(_.query).mkString("\n"), + response, + fieldAliases, + aggregations.map(kv => kv._1 -> kv._2) + ) + ) + ) + case ElasticSuccess(_) => + val error = + ElasticError( + message = + s"Failed to execute asynchronous multi-search with ${elasticQueries.queries.size} queries", + operation = Some("multiSearchAsync") + ) + logger.error(s"❌ ${error.message}") + Future.successful(ElasticResult.failure(error)) + case ElasticFailure(error) => + logger.error( + s"❌ Failed to execute asynchronous multi-search with ${elasticQueries.queries.size} queries: ${error.message}" + ) + Future.successful( + ElasticResult.failure( + error.copy( + operation = Some("multiSearchAsync") + ) + ) + ) + } + } + + // ======================================================================== + // SEARCH METHODS WITH CONVERSION + // ======================================================================== + + /** Searches and converts results into typed entities from an SQL query. + * + * @param sqlQuery + * the SQL query containing fieldAliases and aggregations + * @tparam U + * the type of entities to return + * @return + * the entities matching the query + */ + def searchAs[U]( + sqlQuery: SQLQuery + )(implicit m: Manifest[U], formats: Formats): ElasticResult[Seq[U]] = { + for { + response <- search(sqlQuery) + entities <- convertToEntities[U](response) + } yield entities + } + + /** Searches and converts results into typed entities. + * + * @param elasticQuery + * the Elasticsearch query + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @tparam U + * the type of entities to return + * @return + * the entities matching the query + */ + def singleSearchAs[U]( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit + m: Manifest[U], + formats: Formats + ): ElasticResult[Seq[U]] = { + for { + response <- singleSearch(elasticQuery, fieldAliases, aggregations) + entities <- convertToEntities[U](response) + } yield entities + } + + /** Multi-search with conversion to typed entities. + * + * @param elasticQueries + * the Elasticsearch queries + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @tparam U + * the type of entities to return + * @return + * the entities matching the queries + */ + def multisearchAs[U]( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit m: Manifest[U], formats: Formats): ElasticResult[Seq[U]] = { + for { + response <- multiSearch(elasticQueries, fieldAliases, aggregations) + entities <- convertToEntities[U](response) + } yield entities + } + + // ======================================================================== + // ASYNCHRONOUS SEARCH METHODS WITH CONVERSION + // ======================================================================== + + /** Asynchronous search with conversion to typed entities. + * + * @param sqlQuery + * the SQL query + * @tparam U + * the type of entities to return + * @return + * a Future containing the entities + */ + def searchAsyncAs[U]( + sqlQuery: SQLQuery + )(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Seq[U]]] = { + searchAsync(sqlQuery).flatMap { + case ElasticFailure(error) => + logger.error( + s"❌ Failed to execute asynchronous search for query '${sqlQuery.query}': ${error.message}" + ) + Future.successful(ElasticResult.failure(error)) + case ElasticSuccess(response) => + logger.info( + s"✅ Successfully executed asynchronous search for query '${sqlQuery.query}'" + ) + Future.successful(convertToEntities[U](response)) + } + } + + /** Asynchronous search with conversion to typed entities. + * + * @param elasticQuery + * the Elasticsearch query + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @tparam U + * the type of entities to return + * @return + * a Future containing the entities + */ + def singleSearchAsyncAs[U]( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Seq[U]]] = { + singleSearchAsync(elasticQuery, fieldAliases, aggregations).flatMap { + case ElasticFailure(error) => + logger.error( + s"❌ Failed to execute asynchronous search for query '${elasticQuery.query}': ${error.message}" + ) + Future.successful(ElasticResult.failure(error)) + case ElasticSuccess(response) => + logger.info( + s"✅ Successfully executed asynchronous search for query '${elasticQuery.query}'" + ) + Future.successful(convertToEntities[U](response)) + } + } + + /** Asynchronous multi-search with conversion to typed entities. + * + * @param elasticQueries + * the Elasticsearch queries + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @tparam U + * the type of entities to return + * @return + * a Future containing the entities + */ + def multiSearchAsyncAs[U]( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Seq[U]]] = { + multiSearchAsync(elasticQueries, fieldAliases, aggregations).flatMap { + case ElasticFailure(error) => + logger.error( + s"❌ Failed to execute asynchronous multi-search with ${elasticQueries.queries.size} queries: ${error.message}" + ) + Future.successful(ElasticResult.failure(error)) + case ElasticSuccess(response) => + logger.info( + s"✅ Successfully executed asynchronous multi-search with ${elasticQueries.queries.size} queries" + ) + Future.successful(convertToEntities[U](response)) + } + } + + // ======================================================================== + // SEARCH METHODS WITH INNER HITS + // ======================================================================== + + @deprecated("Use `search` instead.", "v0.10") + /** Search with inner hits from an SQL query. + * + * @deprecated + * Use `search` instead. + * @param sqlQuery + * the SQL query + * @param innerField + * the field for inner hits + * @tparam U + * the type of the main entity + * @tparam I + * the type of inner hits + * @return + * tuples (main entity, inner hits) + */ + def searchWithInnerHits[U: Manifest: ClassTag, I: Manifest: ClassTag]( + sql: SQLQuery, + innerField: String + )(implicit + formats: Formats + ): ElasticResult[Seq[(U, Seq[I])]] = { + sql.request match { + case Some(Left(single)) => + val elasticQuery = ElasticQuery( + single, + collection.immutable.Seq(single.sources: _*) + ) + singleSearchWithInnerHits[U, I](elasticQuery, innerField) + + case Some(Right(multiple)) => + val elasticQueries = ElasticQueries( + multiple.requests.map { query => + ElasticQuery( + query, + collection.immutable.Seq(query.sources: _*) + ) + }.toList + ) + multisearchWithInnerHits[U, I](elasticQueries, innerField) + + case None => + logger.error( + s"❌ Failed to execute search with inner hits for query '${sql.query}'" + ) + ElasticResult.failure( + ElasticError( + message = s"SQL query does not contain a valid search request: ${sql.query}", + operation = Some("searchWithInnerHits") + ) + ) + } + } + + @deprecated("Use `search` instead.", "v0.10") + /** Search with inner hits from an Elasticsearch query. + * + * @deprecated + * Use `search` instead. + * @param elasticQuery + * the Elasticsearch query + * @param innerField + * the field for inner hits + * @tparam U + * the type of the main entity + * @tparam I + * the type of inner hits + * @return + * tuples (main entity, inner hits) + */ + def singleSearchWithInnerHits[U: Manifest: ClassTag, I: Manifest: ClassTag]( + elasticQuery: ElasticQuery, + innerField: String + )(implicit + formats: Formats + ): ElasticResult[Seq[(U, Seq[I])]] = { + validateJson("search", elasticQuery.query) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid query: ${error.message}", + statusCode = Some(400), + index = Some(elasticQuery.indices.mkString(",")), + operation = Some("singleSearchWithInnerHits") + ) + ) + case None => // continue + } + + logger.debug( + s"🔍 Searching inner hits with query '${elasticQuery.query}' in indices '${elasticQuery.indices + .mkString(",")}'" + ) + + executeSingleSearch(elasticQuery) match { + case ElasticSuccess(Some(response)) => + logger.info( + s"✅ Successfully executed search with inner hits in indices '${elasticQuery.indices.mkString(",")}'" + ) + ElasticResult.attempt { + new JsonParser().parse(response).getAsJsonObject + } match { + case ElasticFailure(error) => + logger.error( + s"❌ Failed to parse Elasticsearch response for search with inner hits in indices '${elasticQuery.indices + .mkString(",")}': ${error.message}" + ) + ElasticResult.failure( + error.copy( + operation = Some("singleSearchWithInnerHits"), + index = Some(elasticQuery.indices.mkString(",")) + ) + ) + case ElasticSuccess(parsedResponse) => + ElasticResult.attempt(parseInnerHits[U, I](parsedResponse, innerField)) + } + case ElasticSuccess(_) => + val error = + ElasticError( + message = + s"Failed to execute search with inner hits in indices '${elasticQuery.indices.mkString(",")}'", + index = Some(elasticQuery.indices.mkString(",")), + operation = Some("singleSearchWithInnerHits") + ) + logger.error(s"❌ ${error.message}") + ElasticResult.failure(error) + case ElasticFailure(error) => + logger.error( + s"❌ Failed to execute search with inner hits in indices '${elasticQuery.indices + .mkString(",")}': ${error.message}" + ) + ElasticResult.failure( + error.copy( + operation = Some("singleSearchWithInnerHits"), + index = Some(elasticQuery.indices.mkString(",")) + ) + ) + } + } + + @deprecated("Use `multisearch` instead.", "v0.10") + /** Multisearch with inner hits from Elasticsearch queries. + * + * @deprecated + * Use `multisearch` instead. + * @param elasticQueries + * the Elasticsearch queries + * @param innerField + * the field for inner hits + * @tparam U + * the type of the main entity + * @tparam I + * the type of inner hits + * @return + * a sequence of results with inner hits + */ + def multisearchWithInnerHits[U: Manifest: ClassTag, I: Manifest: ClassTag]( + elasticQueries: ElasticQueries, + innerField: String + )(implicit + formats: Formats + ): ElasticResult[Seq[(U, Seq[I])]] = { + elasticQueries.queries.flatMap { elasticQuery => + validateJson("search", elasticQuery.query).map(error => + elasticQuery.indices.mkString(",") -> error.message + ) + } match { + case Nil => // continue + case errors => + return ElasticResult.failure( + ElasticError( + message = s"Invalid queries: ${errors.map(_._2).mkString(",")}", + statusCode = Some(400), + index = Some(errors.map(_._1).mkString(",")), + operation = Some("multisearchWithInnerHits") + ) + ) + } + + logger.debug( + s"🔍 Multi-searching inner hits with ${elasticQueries.queries.size} queries" + ) + + executeMultiSearch(elasticQueries) match { + case ElasticSuccess(Some(response)) => + logger.info( + s"✅ Successfully executed multi-search inner hits with ${elasticQueries.queries.size} queries" + ) + ElasticResult.attempt { + new JsonParser().parse(response).getAsJsonObject + } match { + case ElasticFailure(error) => + logger.error( + s"❌ Failed to parse Elasticsearch response for multi-search inner hits with ${elasticQueries.queries.size} queries: ${error.message}" + ) + ElasticResult.failure( + error.copy( + operation = Some("multisearchWithInnerHits") + ) + ) + case ElasticSuccess(parsedResponse) => + ElasticResult.attempt(parseInnerHits[U, I](parsedResponse, innerField)) + } + case ElasticSuccess(_) => + val error = + ElasticError( + message = + s"Failed to execute multi-search inner hits with ${elasticQueries.queries.size} queries", + operation = Some("multisearchWithInnerHits") + ) + logger.error(s"❌ ${error.message}") + ElasticResult.failure(error) + case ElasticFailure(error) => + logger.error( + s"❌ Failed to execute multi-search inner hits with ${elasticQueries.queries.size} queries: ${error.message}" + ) + ElasticResult.failure( + error.copy( + operation = Some("multisearchWithInnerHits") + ) + ) + } + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + private[client] def executeSingleSearch( + elasticQuery: ElasticQuery + ): ElasticResult[Option[String]] + + private[client] def executeMultiSearch( + elasticQueries: ElasticQueries + ): ElasticResult[Option[String]] + + private[client] def executeSingleSearchAsync( + elasticQuery: ElasticQuery + )(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[String]]] + + private[client] def executeMultiSearchAsync( + elasticQueries: ElasticQueries + )(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[String]]] + + // ================================================================================ + // IMPLICIT CONVERSIONS + // ================================================================================ + + /** Implicit conversion of an SQL query to Elasticsearch JSON. Used for query serialization. + * + * @param sqlSearch + * the SQL search request to convert + * @return + * JSON string representation of the query + */ + private[client] implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String + + private def parseInnerHits[M: Manifest: ClassTag, I: Manifest: ClassTag]( + searchResult: JsonObject, + innerField: String + )(implicit formats: Formats): Seq[(M, Seq[I])] = { + val mManifest = implicitly[Manifest[M]] + val iManifest = implicitly[Manifest[I]] + val mClass = classTag[M].runtimeClass + val iClass = classTag[I].runtimeClass + + logger.info( + s"🔍 Processing inner hits with types: M=${mClass.getSimpleName}, I=${iClass.getSimpleName}" + ) + + def innerHits(result: JsonElement) = { + result.getAsJsonObject + .get("inner_hits") + .getAsJsonObject + .get(innerField) + .getAsJsonObject + .get("hits") + .getAsJsonObject + .get("hits") + .getAsJsonArray + .iterator() + } + + val gson = new Gson() + val results = searchResult.get("hits").getAsJsonObject.get("hits").getAsJsonArray.iterator() + + (for (result <- results.asScala) + yield ( + result match { + case obj: JsonObject => + Try { + val source = gson.toJson(obj.get("_source")) + logger.debug( + s"Deserializing main entity ${mClass.getSimpleName} from source: $source" + ) + serialization.read[M](source)(formats, mManifest) + } match { + case Success(s) => s + case Failure(f) => + logger.error(s"❌ Failed to deserialize main entity: ${f.getMessage}", f) + throw f + } + case _ => serialization.read[M](result.getAsString)(formats, mManifest) + }, + (for (innerHit <- innerHits(result).asScala) yield innerHit match { + case obj: JsonObject => + Try { + val source = gson.toJson(obj.get("_source")) + logger.debug( + s"Deserializing inner hit entity ${iClass.getSimpleName} from source: $source" + ) + serialization.read[I](source)(formats, iManifest) + } match { + case Success(s) => s + case Failure(f) => + logger.error(s"❌ Failed to deserialize inner hit entity: ${f.getMessage}") + throw f + } + case _ => serialization.read[I](innerHit.getAsString)(formats, iManifest) + }).toList + )).toList + } + + // ======================================================================== + // PRIVATE HELPERS + // ======================================================================== + + /** Converts an Elasticsearch response to typed entities. + * + * @param response + * the Elasticsearch response + * @tparam U + * the type of entities to convert to + * @return + * ElasticResult containing the entities or an error + */ + private def convertToEntities[U]( + response: ElasticResponse + )(implicit m: Manifest[U], formats: Formats): ElasticResult[Seq[U]] = { + val results = ElasticResult.fromTry(convertTo[U](response)) + results + .fold( + onFailure = error => + ElasticResult.failure( + ElasticError( + message = s"Failed to convert search results to ${m.runtimeClass.getSimpleName}", + cause = error.cause, + operation = Some("convertToEntities") + ) + ), + onSuccess = entities => ElasticResult.success(entities) + ) + } + +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/SettingsApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/SettingsApi.scala new file mode 100644 index 00000000..8a10f117 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/SettingsApi.scala @@ -0,0 +1,187 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ + ElasticError, + ElasticFailure, + ElasticResult, + ElasticSuccess +} +import com.google.gson.JsonParser + +/** Settings management API. + */ +trait SettingsApi { _: IndicesApi => + + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + /** Toggle the refresh interval of an index. + * @param index + * - the name of the index + * @param enable + * - true to enable the refresh interval, false to disable it + * @return + * true if the settings were updated successfully, false otherwise + */ + def toggleRefresh(index: String, enable: Boolean): ElasticResult[Boolean] = { + val refreshValue = if (enable) "1s" else "-1" + updateSettings(index, s"""{"index": {"refresh_interval": "$refreshValue"}}""") + } + + /** Set the number of replicas for an index. + * @param index + * - the name of the index + * @param replicas + * - the number of replicas to set + * @return + * true if the settings were updated successfully, false otherwise + */ + def setReplicas(index: String, replicas: Int): ElasticResult[Boolean] = { + updateSettings(index, s"""{"index" : {"number_of_replicas" : $replicas} }""") + } + + /** Update index settings. + * @param index + * - the name of the index + * @param settings + * - the settings to apply to the index (default is defaultSettings) + * @return + * true if the settings were updated successfully, false otherwise + */ + def updateSettings(index: String, settings: String = defaultSettings): ElasticResult[Boolean] = { + validateIndexName(index) match { + case Some(error) => + return ElasticFailure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("updateSettings") + ) + ) + case None => // OK + } + + validateJsonSettings(settings) match { + case Some(error) => + return ElasticFailure( + error.copy( + message = s"Invalid settings: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("updateSettings") + ) + ) + case None => // OK + } + + logger.debug(s"🔧 Updating settings for index $index: $settings") + + closeIndex(index) match { + case failure @ ElasticFailure(error) => + logger.error( + s"❌ Closing index $index failed, settings for index '$index' will not be updated: ${error.message}" + ) + failure + case ElasticSuccess(true) => + executeUpdateSettings(index, settings) match { + case failure @ ElasticFailure(error) => + logger.error(s"❌ Updating settings for index '$index' failed: ${error.message}") + failure + case ElasticSuccess(false) => + ElasticResult.failure( + ElasticError( + message = s"❌ Updating settings for index '$index' failed", + operation = Some("updateSettings"), + index = Some(index) + ) + ) + case _ => + logger.info(s"✅ Updating settings for index '$index' succeeded") + openIndex(index) + } + } + } + + /** Load the settings of an index. + * @param index + * - the name of the index to load the settings for + * @return + * the settings of the index as a JSON string + */ + def loadSettings(index: String): ElasticResult[String] = { + validateIndexName(index) match { + case Some(error) => + return ElasticFailure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("updateSettings") + ) + ) + case None => // OK + } + + logger.debug(s"🔍 Loading settings for index $index") + + executeLoadSettings(index).flatMap { jsonString => + // ✅ Extracting settings from JSON + ElasticResult.attempt( + new JsonParser().parse(jsonString).getAsJsonObject + ) match { + case ElasticFailure(error) => + logger.error(s"❌ Failed to parse JSON settings for index '$index': ${error.message}") + return ElasticFailure(error.copy(operation = Some("loadSettings"))) + case ElasticSuccess(indexObj) => + if (Option(indexObj).isDefined && indexObj.has(index)) { + val settingsObj = indexObj + .getAsJsonObject(index) + .getAsJsonObject("settings") + .getAsJsonObject("index") + ElasticSuccess(settingsObj.toString) + } else { + val message = s"Index '$index' not found in the loaded settings." + logger.error(s"❌ $message") + ElasticFailure( + ElasticError( + message = message, + operation = Some("loadSettings"), + index = Some(index) + ) + ) + } + } + } + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + private[client] def executeUpdateSettings( + index: String, + settings: String + ): ElasticResult[Boolean] + + private[client] def executeLoadSettings( + index: String + ): ElasticResult[String] +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/UpdateApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/UpdateApi.scala new file mode 100644 index 00000000..92e81297 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/UpdateApi.scala @@ -0,0 +1,264 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ + ElasticError, + ElasticFailure, + ElasticResult, + ElasticSuccess +} +import org.json4s.Formats + +import scala.concurrent.{ExecutionContext, Future, Promise} +import scala.reflect.ClassTag +import scala.util.{Failure, Success} + +/** Update Management API + */ +trait UpdateApi extends ElasticClientHelpers { _: RefreshApi with SerializationApi => + + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + /** Update an entity in the given index. + * @param index + * - the name of the index to update the entity in + * @param id + * - the id of the entity to update + * @param source + * - the source of the entity to update in JSON format + * @param upsert + * - true to upsert the entity if it does not exist, false otherwise + * @return + * true if the entity was updated successfully, false otherwise + */ + def update(index: String, id: String, source: String, upsert: Boolean): ElasticResult[Boolean] = { + validateIndexName(index) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("update") + ) + ) + case None => // continue + } + + validateJson("update", source) match { + case Some(error) => + return ElasticResult.failure( + error.copy( + message = s"Invalid JSON source: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("update") + ) + ) + case None => // continue + } + + logger.debug(s"Updating document with id '$id' in index '$index'") + + executeUpdate(index, id, source, upsert) match { + case ElasticSuccess(true) => + logger.info(s"✅ Successfully updated document with id '$id' in index '$index'") + this.refresh(index) + case ElasticSuccess(false) => + val error = s"Document with id '$id' in index '$index' not updated" + logger.warn(s"❌ $error") + ElasticResult.failure( + ElasticError( + message = error, + operation = Some("update"), + index = Some(index) + ) + ) + case failure @ ElasticFailure(error) => + logger.error( + s"❌ Failed to update document with id '$id' in index '$index': ${error.message}" + ) + failure + } + } + + /** Update an entity in the given index. + * @param entity + * - the entity to update + * @param id + * - the id of the entity to update + * @param index + * - the name of the index to update the entity in (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @param upsert + * - true to upsert the entity if it does not exist, false otherwise + * @return + * true if the entity was updated successfully, false otherwise + */ + def updateAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String] = None, + maybeType: Option[String] = None, + upsert: Boolean = true + )(implicit u: ClassTag[U], formats: Formats): ElasticResult[Boolean] = { + val indexType = maybeType.getOrElse(u.runtimeClass.getSimpleName.toLowerCase) + val indexName = index.getOrElse(indexType) + + ElasticResult + .attempt { + serialization.write[U](entity) + } + .flatMap { source => + this.update(indexName, id, source, upsert) + } + } + + /** Update an entity in the given index asynchronously. + * @param index + * - the name of the index to update the entity in + * @param id + * - the id of the entity to update + * @param source + * - the source of the entity to update in JSON format + * @param upsert + * - true to upsert the entity if it does not exist, false otherwise + * @return + * a Future that completes with true if the entity was updated successfully, false otherwise + */ + def updateAsync(index: String, id: String, source: String, upsert: Boolean)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = { + validateIndexName(index) match { + case Some(error) => + return Future.successful( + ElasticResult.failure( + error.copy( + message = s"Invalid index: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("update") + ) + ) + ) + case None => // continue + } + + validateJson("update", source) match { + case Some(error) => + return Future.successful( + ElasticResult.failure( + error.copy( + message = s"Invalid JSON source: ${error.message}", + statusCode = Some(400), + index = Some(index), + operation = Some("update") + ) + ) + ) + case None => // continue + } + + logger.debug(s"Updating document with id '$id' in index '$index' asynchronously") + + val promise: Promise[ElasticResult[Boolean]] = Promise() + executeUpdateAsync(index, id, source, upsert) onComplete { + case Success(s) => + s match { + case _ @ElasticSuccess(true) => + logger.info(s"✅ Successfully updated document with id '$id' in index '$index'") + promise.success(this.refresh(index)) + case success @ ElasticSuccess(_) => + logger.warn(s"❌ Document with id '$id' in index '$index' not updated") + promise.success(success) + case failure @ ElasticFailure(error) => + logger.error(s"❌ ${error.message}") + promise.success(failure) + } + case Failure(exception) => + val error = ElasticError( + message = + s"Exception while deleting document with id '$id' from index '$index': ${exception.getMessage}", + operation = Some("deleteAsync"), + index = Some(index) + ) + logger.error(s"❌ ${error.message}") + promise.success(ElasticResult.failure(error)) + } + promise.future + } + + /** Update an entity in the given index asynchronously. + * @param entity + * - the entity to update + * @param id + * - the id of the entity to update + * @param index + * - the name of the index to update the entity in (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @param upsert + * - true to upsert the entity if it does not exist, false otherwise + * @return + * a Future that completes with true if the entity was updated successfully, false otherwise + */ + def updateAsyncAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String] = None, + maybeType: Option[String] = None, + upsert: Boolean = true + )(implicit + u: ClassTag[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Boolean]] = { + val indexType = maybeType.getOrElse(u.runtimeClass.getSimpleName.toLowerCase) + val indexName = index.getOrElse(indexType) + + ElasticResult.attempt { + serialization.write[U](entity) + } match { + case failure @ ElasticFailure(_) => + logger.error(s"❌ Failed to serialize entity for update in index '$indexName'") + Future.successful(failure) + case ElasticSuccess(source) => this.updateAsync(indexName, id, source, upsert) + } + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + private[client] def executeUpdate( + index: String, + id: String, + source: String, + upsert: Boolean + ): ElasticResult[Boolean] + + private[client] def executeUpdateAsync( + index: String, + id: String, + source: String, + upsert: Boolean + )(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/VersionApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/VersionApi.scala new file mode 100644 index 00000000..2526c93c --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/VersionApi.scala @@ -0,0 +1,56 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.client.result.{ElasticFailure, ElasticResult, ElasticSuccess} + +trait VersionApi extends ElasticClientHelpers { _: SerializationApi => + + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + // Cache ES version (avoids calling it every time) + @volatile private var cachedVersion: Option[String] = None + + /** Get Elasticsearch version. + * @return + * the Elasticsearch version + */ + def version: ElasticResult[String] = { + cachedVersion match { + case Some(version) => + ElasticSuccess(version) + case None => + executeVersion() match { + case success @ ElasticSuccess(version) => + logger.info(s"✅ Elasticsearch version: $version") + cachedVersion = Some(version) + success + case failure @ ElasticFailure(error) => + logger.error(s"❌ Failed to get Elasticsearch version: ${error.message}") + failure + } + } + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + private[client] def executeVersion(): ElasticResult[String] +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/bulk/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/bulk/package.scala new file mode 100644 index 00000000..1ef6ed7f --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/bulk/package.scala @@ -0,0 +1,271 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import akka.stream.{Attributes, FlowShape, Inlet, Outlet} +import akka.stream.stage.{GraphStage, GraphStageLogic} + +import scala.collection.mutable +import scala.concurrent.duration._ + +package object bulk { + + trait BulkTypes { + type BulkActionType + type BulkResultType + } + + object BulkAction extends Enumeration { + type BulkAction = Value + val INDEX: BulkAction.Value = Value(0, "INDEX") + val UPDATE: BulkAction.Value = Value(1, "UPDATE") + val DELETE: BulkAction.Value = Value(2, "DELETE") + } + + case class BulkItem( + index: String, + action: BulkAction.BulkAction, + document: String, + id: Option[String], + parent: Option[String] + ) + + /** Detailed result of a bulk operation */ + case class BulkResult( + successCount: Int, + successIds: Set[String], + failedCount: Int, + failedDocuments: Seq[FailedDocument], + indices: Set[String], + metrics: BulkMetrics + ) { + def successRate: Double = + if (successCount + failedCount > 0) + successCount.toDouble / (successCount + failedCount) * 100 + else 0.0 + + def hasFailures: Boolean = failedCount > 0 + } + + sealed trait DocumentResult { + def id: String + def index: String + } + + /** Document failed during bulk processing */ + case class FailedDocument( + id: String, + index: String, + document: String, + error: BulkError, + retryable: Boolean + ) extends DocumentResult + + case class SuccessfulDocument( + id: String, + index: String + ) extends DocumentResult + + /** Detailed error */ + case class BulkError( + message: String, + `type`: String, + status: Int, + causedBy: Option[BulkError] = None + ) { + def isRetryable: Boolean = status match { + case 429 | 503 | 504 => true // Too Many Requests, Service Unavailable, Gateway Timeout + case _ => false + } + } + + /** Bulk metrics */ + case class BulkMetrics( + startTime: Long = System.currentTimeMillis(), + endTime: Option[Long] = None, + totalBatches: Int = 0, + totalDocuments: Int = 0, + failuresByStatus: Map[Int, Int] = Map.empty, + failuresByType: Map[String, Int] = Map.empty + ) { + def durationMs: Long = endTime.getOrElse(System.currentTimeMillis()) - startTime + + def throughput: Double = + if (durationMs > 0) totalDocuments * 1000.0 / durationMs + else 0.0 + + def complete: BulkMetrics = copy(endTime = Some(System.currentTimeMillis())) + + def addFailure(error: BulkError): BulkMetrics = copy( + failuresByStatus = + failuresByStatus + (error.status -> (failuresByStatus.getOrElse(error.status, 0) + 1)), + failuresByType = + failuresByType + (error.`type` -> (failuresByType.getOrElse(error.`type`, 0) + 1)) + ) + } + + /** Bulk Configuration */ + case class BulkOptions( + defaultIndex: String, + defaultType: String = "_doc", + maxBulkSize: Int = 1000, + balance: Int = 1, + disableRefresh: Boolean = false, + retryOnFailure: Boolean = true, + maxRetries: Int = 3, + retryDelay: FiniteDuration = 1.second, + retryBackoffMultiplier: Double = 2.0, + enableMetrics: Boolean = true, + logEvery: Int = 10 + ) + + /** Callbacks for bulk events */ + case class BulkCallbacks( + onSuccess: (String, String) => Unit = (_, _) => (), + onFailure: FailedDocument => Unit = _ => (), + onComplete: BulkResult => Unit = _ => (), + onBatchComplete: (Int, BulkMetrics) => Unit = (_, _) => {} + ) + + object BulkCallbacks { + val default: BulkCallbacks = BulkCallbacks() + + def logging(logger: org.slf4j.Logger): BulkCallbacks = BulkCallbacks( + onSuccess = (id, index) => logger.debug(s"✅ Document $id indexed in $index"), + onFailure = + failed => logger.error(s"❌ Document ${failed.id} failed: ${failed.error.message}"), + onComplete = result => + logger.info( + s"📊 Bulk completed: ${result.successCount} successes, ${result.failedCount} failures " + + s"in ${result.metrics.durationMs}ms (${result.metrics.throughput} docs/sec)" + ), + onBatchComplete = (batchSize, metrics) => + logger.info(s"📊 Batch completed: $batchSize docs (${metrics.throughput} docs/sec)") + ) + } + + trait BulkElasticAction { def index: String } // TODO rename to BulkItemIndex + + trait BulkElasticResult { def items: List[BulkElasticResultItem] } // TODO remove + + trait BulkElasticResultItem { def index: String } // TODO remove + + case class BulkSettings[A](disableRefresh: Boolean = false)(implicit + settingsApi: SettingsApi, + toBulkElasticAction: A => BulkElasticAction + ) extends GraphStage[FlowShape[A, A]] { + + val in: Inlet[A] = Inlet[A]("Filter.in") + val out: Outlet[A] = Outlet[A]("Filter.out") + + val shape: FlowShape[A, A] = FlowShape.of(in, out) + + val indices = mutable.Set.empty[String] + + override def createLogic(inheritedAttributes: Attributes): GraphStageLogic = { + new GraphStageLogic(shape) { + setHandler( + in, + () => { + val elem = grab(in) + val index = elem.index + if (!indices.contains(index)) { + if (disableRefresh) { + settingsApi.updateSettings( + index, + """{"index" : {"refresh_interval" : "-1", "number_of_replicas" : 0} }""" + ) + } + indices.add(index) + } + push(out, elem) + } + ) + setHandler( + out, + () => { + pull(in) + } + ) + } + } + } + + def docAsUpsert(doc: String): String = s"""{"doc":$doc,"doc_as_upsert":true}""" + + object BulkErrorAnalyzer { + + /** Determines whether a bulk error is retryable based on the status. + * + * @param statusCode + * HTTP error status code + * @return + * true if the error is retryable, false otherwise + */ + def isRetryable(statusCode: Int): Boolean = statusCode match { + // Temporary errors - retryable + case 429 => true // Too Many Requests + case 503 => true // Service Unavailable + case 504 => true // Gateway Timeout + case 408 => true // Request Timeout + case 502 => true // Bad Gateway + + // Permanent errors - not retryable + case 400 => false // Bad Request + case 401 => false // Unauthorized + case 403 => false // Forbidden + case 404 => false // Not Found + case 409 => false // Conflict (version) + case 413 => false // Payload Too Large + + // By default, 5xx errors (except those listed) are retryable. + case code if code >= 500 && code < 600 => true + + // Other 4xx errors are non-retryable + case code if code >= 400 && code < 500 => false + + // Status 2xx and 3xx should not be errors + case _ => false + } + + /** Determines whether a bulk error is retryable based on the ES error type. + * + * @param errorType + * Elasticsearch error type + * @return + * true if the error is retryable, false otherwise + */ + def isRetryableByType(errorType: String): Boolean = errorType match { + // Retryable errors + case "es_rejected_execution_exception" => true + case "circuit_breaking_exception" => true + case "timeout_exception" => true + case "unavailable_shards_exception" => true + + // non-retryable errors + case "mapper_parsing_exception" => false + case "illegal_argument_exception" => false + case "version_conflict_engine_exception" => false + case "document_missing_exception" => false + case "index_not_found_exception" => false + case "strict_dynamic_mapping_exception" => false + + // By default, it is considered non-retryable + case _ => false + } + } +} diff --git a/es7/rest/src/main/scala/app/softnetwork/elastic/persistence/query/RestHighLevelClientProvider.scala b/core/src/main/scala/app/softnetwork/elastic/client/metrics/AggregatedMetrics.scala similarity index 54% rename from es7/rest/src/main/scala/app/softnetwork/elastic/persistence/query/RestHighLevelClientProvider.scala rename to core/src/main/scala/app/softnetwork/elastic/client/metrics/AggregatedMetrics.scala index 7a93fcf9..a0f52302 100644 --- a/es7/rest/src/main/scala/app/softnetwork/elastic/persistence/query/RestHighLevelClientProvider.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/metrics/AggregatedMetrics.scala @@ -14,15 +14,19 @@ * limitations under the License. */ -package app.softnetwork.elastic.persistence.query +package app.softnetwork.elastic.client.metrics -import app.softnetwork.elastic.client.rest.RestHighLevelClientApi -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.model.Timestamped - -trait RestHighLevelClientProvider[T <: Timestamped] - extends ElasticProvider[T] - with RestHighLevelClientApi { - _: ManifestWrapper[T] => +case class AggregatedMetrics( + totalOperations: Long, + successCount: Long, + failureCount: Long, + totalDuration: Long, + operationMetrics: Map[String, OperationMetrics], + indexMetrics: Map[String, OperationMetrics] +) { + def averageDuration: Double = + if (totalOperations > 0) totalDuration.toDouble / totalOperations else 0.0 + def successRate: Double = + if (totalOperations > 0) (successCount.toDouble / totalOperations) * 100 else 0.0 } diff --git a/core/src/main/scala/app/softnetwork/elastic/client/metrics/MetricsApi.scala b/core/src/main/scala/app/softnetwork/elastic/client/metrics/MetricsApi.scala new file mode 100644 index 00000000..520703ac --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/metrics/MetricsApi.scala @@ -0,0 +1,256 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.metrics + +import scala.language.implicitConversions + +trait MetricsApi { + + //format:off + /** Records an operation with its metrics. + * + * This method is called automatically by the decorator, but can also be used manually for custom + * operations. + * + * @param operation + * Operation name (e.g., "search", "index") + * @param duration + * Operation duration in milliseconds + * @param success + * Indicates if the operation succeeded + * @param index + * Elasticsearch index involved (optional) + * + * @example + * {{{ + * val client = ElasticClientFactory.createWithMetrics(config) + * + * // Manual recording + * val start = System.currentTimeMillis() + * try { + * // Custom operation + * customElasticsearchOperation() + * val duration = System.currentTimeMillis() - start + * client.recordOperation("customOp", duration, success = true, Some("myindex")) + * } catch { + * case e: Exception => + * val duration = System.currentTimeMillis() - start + * client.recordOperation("customOp", duration, success = false, Some("myindex")) + * } + * }}} + */ + //format:on + def recordOperation( + operation: String, + duration: Long, + success: Boolean, + index: Option[String] = None + ): Unit + + //format:off + /** Retrieves aggregated global metrics. + * + * @return + * Global metrics for all operations + * + * @example + * {{{ + * val client = ElasticClientFactory.createWithMetrics(config) + * + * // Perform operations + * client.createIndex("test") + * client.index("test", "1", """{"data": "value"}""") + * + * // Retrieve metrics + * val metrics = client.getMetrics + * + * println(s""" + * |Total operations: ${metrics.totalOperations} + * |Successful: ${metrics.successCount} + * |Failed: ${metrics.failureCount} + * |Success rate: ${metrics.successRate}% + * |Failure rate: ${metrics.failureRate}% + * |Average duration: ${metrics.averageDuration}ms + * |Min duration: ${metrics.minDuration}ms + * |Max duration: ${metrics.maxDuration}ms + * |Last execution: ${new Date(metrics.lastExecutionTime)} + * """.stripMargin) + * }}} + */ + //format:on + def getMetrics: OperationMetrics + + //format:off + /** Retrieves metrics for a specific operation. + * + * @param operation + * Operation name + * @return + * Operation metrics, or None if no data + * + * @example + * {{{ + * val client = ElasticClientFactory.createWithMetrics(config) + * + * // Perform multiple searches + * (1 to 100).foreach { i => + * val query = ElasticQuery(indices = Seq("products")) + * client.search(query, Map.empty, Map.empty) + * } + * + * // Analyze search performance + * client.getMetricsByOperation("search").foreach { metrics => + * println(s""" + * |=== Search Performance === + * |Total searches: ${metrics.totalOperations} + * |Success rate: ${metrics.successRate}% + * |Average latency: ${metrics.averageDuration}ms + * |Fastest: ${metrics.minDuration}ms + * |Slowest: ${metrics.maxDuration}ms + * | + * |Performance grade: ${ + * if (metrics.averageDuration < 100) "Excellent" + * else if (metrics.averageDuration < 500) "Good" + * else if (metrics.averageDuration < 1000) "Average" + * else "Needs optimization" + * } + * """.stripMargin) + * } + * }}} + */ + //format:on + def getMetricsByOperation(operation: String): Option[OperationMetrics] + + //format:off + /** Retrieves metrics for a specific index. + * + * @param index + * Index name + * @return + * Index metrics, or None if no data + * + * @example + * {{{ + * val client = ElasticClientFactory.createWithMetrics(config) + * + * // Operations on different indexes + * client.index("products", "1", """{"name": "Product 1"}""") + * client.index("products", "2", """{"name": "Product 2"}""") + * client.index("orders", "1", """{"total": 100}""") + * + * // Metrics by index + * client.getMetricsByIndex("products").foreach { metrics => + * println(s""" + * |Products Index: + * | Operations: ${metrics.totalOperations} + * | Avg duration: ${metrics.averageDuration}ms + * """.stripMargin) + * } + * + * client.getMetricsByIndex("orders").foreach { metrics => + * println(s""" + * |Orders Index: + * | Operations: ${metrics.totalOperations} + * | Avg duration: ${metrics.averageDuration}ms + * """.stripMargin) + * } + * + * // Compare performance + * val productsPerf = client.getMetricsByIndex("products").map(_.averageDuration).getOrElse(0.0) + * val ordersPerf = client.getMetricsByIndex("orders").map(_.averageDuration).getOrElse(0.0) + * + * if (productsPerf > ordersPerf * 2) { + * println("⚠️ Products index is significantly slower than orders") + * } + * }}} + */ + //format:on + def getMetricsByIndex(index: String): Option[OperationMetrics] + + //format:off + /** Retrieves all aggregated metrics with details by operation and by index. + * + * @return + * Complete metrics with breakdowns + * + * @example + * {{{ + * val client = ElasticClientFactory.createWithMetrics(config) + * + * // Perform various operations + * client.createIndex("test1") + * client.createIndex("test2") + * client.index("test1", "1", """{"data": 1}""") + * client.index("test2", "1", """{"data": 2}""") + * client.search(ElasticQuery(Seq("test1")), Map.empty, Map.empty) + * + * // Complete report + * val aggregated = client.getAggregatedMetrics + * + * println(s""" + * |=== Global Report === + * |Total operations: ${aggregated.totalOperations} + * |Success rate: ${aggregated.successRate}% + * |Average duration: ${aggregated.averageDuration}ms + * | + * |=== By Operation === + * |${aggregated.operationMetrics.map { case (op, m) => + * s"$op: ${m.totalOperations} ops, ${m.averageDuration}ms avg, ${m.successRate}% success" + * }.mkString("\n")} + * | + * |=== By Index === + * |${aggregated.indexMetrics.map { case (idx, m) => + * s"$idx: ${m.totalOperations} ops, ${m.averageDuration}ms avg" + * }.mkString("\n")} + * """.stripMargin) + * }}} + */ + //format:on + def getAggregatedMetrics: AggregatedMetrics + + //format:off + /** Resets all collected metrics. + * + * Useful for starting a new measurement period or after a test. + * + * @example + * {{{ + * val client = ElasticClientFactory.createWithMetrics(config) + * + * // Warmup phase + * (1 to 100).foreach { i => + * client.index("test", s"$i", s"""{"value": $i}""") + * } + * + * // Reset before real measurements + * client.resetMetrics() + * + * // Real measurements + * val start = System.currentTimeMillis() + * (1 to 1000).foreach { i => + * client.index("test", s"real_$i", s"""{"value": $i}""") + * } + * val end = System.currentTimeMillis() + * + * // Analyze clean metrics + * val metrics = client.getMetrics + * println(s"Pure indexing performance: ${metrics.averageDuration}ms per doc") + * }}} + */ + //format:on + def resetMetrics(): Unit +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/metrics/MetricsCollector.scala b/core/src/main/scala/app/softnetwork/elastic/client/metrics/MetricsCollector.scala new file mode 100644 index 00000000..f48a7031 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/metrics/MetricsCollector.scala @@ -0,0 +1,161 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.metrics + +import java.util.concurrent.ConcurrentHashMap +import java.util.concurrent.atomic.AtomicLong + +import scala.jdk.CollectionConverters._ + +class MetricsCollector extends MetricsApi { + + private val metrics = new ConcurrentHashMap[String, MetricAccumulator]() + private val indexMetrics = new ConcurrentHashMap[String, MetricAccumulator]() + + private class MetricAccumulator { + val totalOps = new AtomicLong(0) + val successOps = new AtomicLong(0) + val failureOps = new AtomicLong(0) + val totalDuration = new AtomicLong(0) + val minDuration = new AtomicLong(Long.MaxValue) + val maxDuration = new AtomicLong(Long.MinValue) + val lastExecution = new AtomicLong(0) + + /** Records an operation with its duration and success status. Thread-safe implementation using + * atomic operations. + * + * @param duration + * Operation duration in milliseconds + * @param success + * Whether the operation succeeded + */ + def record(duration: Long, success: Boolean): Unit = { + // Update counters + totalOps.incrementAndGet() + if (success) successOps.incrementAndGet() else failureOps.incrementAndGet() + totalDuration.addAndGet(duration) + lastExecution.set(System.currentTimeMillis()) + + // Update min duration using atomic operation + minDuration.updateAndGet(current => Math.min(current, duration)) + + // Update max duration using atomic operation + maxDuration.updateAndGet(current => Math.max(current, duration)) + } + + /** Converts accumulated metrics to an OperationMetrics object. + * + * @param operation + * The operation name + * @return + * OperationMetrics snapshot + */ + def toMetrics(operation: String): OperationMetrics = { + val min = minDuration.get() + val max = maxDuration.get() + + OperationMetrics( + operation = operation, + totalOperations = totalOps.get(), + successCount = successOps.get(), + failureCount = failureOps.get(), + totalDuration = totalDuration.get(), + minDuration = if (min == Long.MaxValue) 0 else min, + maxDuration = if (max == Long.MinValue) 0 else max, + lastExecutionTime = lastExecution.get() + ) + } + + /** Resets all metrics to initial values. Useful for testing or periodic metric resets. + */ + def reset(): Unit = { + totalOps.set(0) + successOps.set(0) + failureOps.set(0) + totalDuration.set(0) + minDuration.set(Long.MaxValue) + maxDuration.set(Long.MinValue) + lastExecution.set(0) + } + } + + override def recordOperation( + operation: String, + duration: Long, + success: Boolean, + index: Option[String] = None + ): Unit = { + // Record operation metrics + val accumulator = metrics.computeIfAbsent(operation, _ => new MetricAccumulator()) + accumulator.record(duration, success) + + // Record index metrics if provided + index.foreach { idx => + val idxAccumulator = indexMetrics.computeIfAbsent(idx, _ => new MetricAccumulator()) + idxAccumulator.record(duration, success) + } + } + + override def getMetrics: OperationMetrics = { + val allMetrics = metrics.asScala.values.toSeq + + if (allMetrics.isEmpty) { + OperationMetrics("all", 0, 0, 0, 0, 0, 0, 0) + } else { + OperationMetrics( + operation = "all", + totalOperations = allMetrics.map(_.totalOps.get()).sum, + successCount = allMetrics.map(_.successOps.get()).sum, + failureCount = allMetrics.map(_.failureOps.get()).sum, + totalDuration = allMetrics.map(_.totalDuration.get()).sum, + minDuration = + allMetrics.map(_.minDuration.get()).filter(_ != Long.MaxValue).minOption.getOrElse(0), + maxDuration = allMetrics.map(_.maxDuration.get()).max, + lastExecutionTime = allMetrics.map(_.lastExecution.get()).max + ) + } + } + + override def getMetricsByOperation(operation: String): Option[OperationMetrics] = { + Option(metrics.get(operation)).map(_.toMetrics(operation)) + } + + override def getMetricsByIndex(index: String): Option[OperationMetrics] = { + Option(indexMetrics.get(index)).map(_.toMetrics(index)) + } + + override def getAggregatedMetrics: AggregatedMetrics = { + val globalMetrics = getMetrics + AggregatedMetrics( + totalOperations = globalMetrics.totalOperations, + successCount = globalMetrics.successCount, + failureCount = globalMetrics.failureCount, + totalDuration = globalMetrics.totalDuration, + operationMetrics = metrics.asScala.map { case (op, acc) => + op -> acc.toMetrics(op) + }.toMap, + indexMetrics = indexMetrics.asScala.map { case (idx, acc) => + idx -> acc.toMetrics(idx) + }.toMap + ) + } + + override def resetMetrics(): Unit = { + metrics.clear() + indexMetrics.clear() + } +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/metrics/MetricsConfig.scala b/core/src/main/scala/app/softnetwork/elastic/client/metrics/MetricsConfig.scala new file mode 100644 index 00000000..cc89dc53 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/metrics/MetricsConfig.scala @@ -0,0 +1,45 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.metrics + +import app.softnetwork.elastic.client.monitoring.MonitoringConfig + +//format:off +/** Performance metrics configuration. + * + * @param enabled + * Enables/disables metrics collection + * @param monitoring + * Automatic monitoring configuration + * @example + * {{{ + * val metricsConfig = MetricsConfig( + * enabled = true, + * monitoring = MonitoringConfig( + * enabled = true, + * interval = 1.minute, + * failureRateThreshold = 5.0, + * latencyThreshold = 500.0 + * ) + * ) + * }}} + */ +//format:on +case class MetricsConfig( + enabled: Boolean = true, + monitoring: MonitoringConfig = MonitoringConfig() +) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/metrics/MetricsElasticClient.scala b/core/src/main/scala/app/softnetwork/elastic/client/metrics/MetricsElasticClient.scala new file mode 100644 index 00000000..914510c2 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/metrics/MetricsElasticClient.scala @@ -0,0 +1,1030 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.metrics + +import akka.NotUsed +import akka.actor.ActorSystem +import akka.stream.scaladsl.Source +import app.softnetwork.elastic.client.{ + ElasticClientApi, + ElasticClientDelegator, + ElasticQueries, + ElasticQuery, + ElasticResponse, + SingleValueAggregateResult +} +import app.softnetwork.elastic.client.bulk._ +import app.softnetwork.elastic.client.result._ +import app.softnetwork.elastic.client.scroll._ +import app.softnetwork.elastic.sql.query.{SQLAggregation, SQLQuery} +import org.json4s.Formats + +import scala.concurrent.{ExecutionContext, Future} +import scala.language.implicitConversions +import scala.reflect.ClassTag +import scala.util.{Failure, Success} + +/** Decorator that adds metrics to an existing Elasticsearch client. + * + * @param delegate + * - The Elasticsearch client to decorate + * @param metricsCollector + * - The Metrics Collector + */ +class MetricsElasticClient( + val delegate: ElasticClientApi, + val metricsCollector: MetricsCollector +) extends ElasticClientDelegator + with MetricsApi { + + // Helper for measuring operations + private def measureAsync[T](operation: String, index: Option[String] = None)( + block: => Future[T] + )(implicit ec: ExecutionContext): Future[T] = { + val startTime = System.currentTimeMillis() + block.transform { + case Success(result) => + val duration = System.currentTimeMillis() - startTime + metricsCollector.recordOperation(operation, duration, success = true, index) + Success(result) + case Failure(ex) => + val duration = System.currentTimeMillis() - startTime + metricsCollector.recordOperation(operation, duration, success = false, index) + Failure(ex) + } + } + + private def measureResult[T](operation: String, index: Option[String] = None)( + block: => ElasticResult[T] + ): ElasticResult[T] = { + val startTime = System.currentTimeMillis() + val result = block + val duration = System.currentTimeMillis() - startTime + metricsCollector.recordOperation(operation, duration, success = result.isSuccess, index) + result + } + + // ==================== VersionApi ==================== + + override def version: ElasticResult[String] = + measureResult("version") { + delegate.version + } + + // ==================== IndicesApi ==================== + + override def createIndex(index: String, settings: String): ElasticResult[Boolean] = { + measureResult("createIndex", Some(index)) { + delegate.createIndex(index, settings) + } + } + + override def deleteIndex(index: String): ElasticResult[Boolean] = { + measureResult("deleteIndex", Some(index)) { + delegate.deleteIndex(index) + } + } + + override def closeIndex(index: String): ElasticResult[Boolean] = { + measureResult("closeIndex", Some(index)) { + delegate.closeIndex(index) + } + } + + override def openIndex(index: String): ElasticResult[Boolean] = { + measureResult("openIndex", Some(index)) { + delegate.openIndex(index) + } + } + + override def reindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = { + measureResult("reindex", Some(s"$sourceIndex->$targetIndex")) { + delegate.reindex(sourceIndex, targetIndex, refresh) + } + } + + override def indexExists(index: String): ElasticResult[Boolean] = { + measureResult("indexExists", Some(index)) { + delegate.indexExists(index) + } + } + + // ==================== AliasApi ==================== + + override def addAlias(index: String, alias: String): ElasticResult[Boolean] = { + measureResult("addAlias", Some(index)) { + delegate.addAlias(index, alias) + } + } + + override def removeAlias(index: String, alias: String): ElasticResult[Boolean] = { + measureResult("removeAlias", Some(index)) { + delegate.removeAlias(index, alias) + } + } + + /** Check if an alias exists. + * + * @param alias + * the name of the alias to check + * @return + * ElasticSuccess(true) if it exists, ElasticSuccess(false) otherwise, ElasticFailure in case + * of error + * @example + * {{{ + * aliasExists("my-alias") match { + * case ElasticSuccess(true) => println("Alias exists") + * case ElasticSuccess(false) => println("Alias does not exist") + * case ElasticFailure(error) => println(s"Error: ${error.message}") + * } + * }}} + */ + override def aliasExists(alias: String): ElasticResult[Boolean] = + measureResult("aliasExists") { + delegate.aliasExists(alias) + } + + /** Retrieve all aliases from an index. + * + * @param index + * the index name + * @return + * ElasticResult with the list of aliases + * @example + * {{{ + * getAliases("my-index") match { + * case ElasticSuccess(aliases) => println(s"Aliases: ${aliases.mkString(", ")}") + * case ElasticFailure(error) => println(s"Error: ${error.message}") + * } + * + * }}} + */ + override def getAliases(index: String): ElasticResult[Set[String]] = + measureResult("getAliases", Some(index)) { + delegate.getAliases(index) + } + + /** Atomic swap of an alias between two indexes. + * + * This operation is atomic: the alias is removed from oldIndex and added to newIndex in a single + * query, thus avoiding any period when the alias does not exist. This is the recommended + * operation for zero-downtime deployments. + * + * @param oldIndex + * the current index pointed to by the alias + * @param newIndex + * the new index that should point to the alias + * @param alias + * the name of the alias to swap + * @return + * ElasticSuccess(true) if swapped, ElasticFailure otherwise + * @example + * {{{ + * // Zero-downtime deployment + * swapAlias(oldIndex = "products-v1", newIndex = "products-v2", alias = "products") match { + * case ElasticSuccess(_) => println("✅ Alias swapped, new version deployed") + * case ElasticFailure(error) => println(s"❌ Error: ${error.message}") + * } + * }}} + * @note + * This operation is atomic and therefore preferable to removeAlias + addAlias + */ + override def swapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): ElasticResult[Boolean] = + measureResult("swapAlias", Some(s"$oldIndex->$newIndex")) { + delegate.swapAlias(oldIndex, newIndex, alias) + } + + // ==================== SettingsApi ==================== + + override def updateSettings(index: String, settings: String): ElasticResult[Boolean] = { + measureResult("updateSettings", Some(index)) { + delegate.updateSettings(index, settings) + } + } + + override def loadSettings(index: String): ElasticResult[String] = { + measureResult("loadSettings", Some(index)) { + delegate.loadSettings(index) + } + } + + /** Toggle the refresh interval of an index. + * + * @param index + * - the name of the index + * @param enable + * - true to enable the refresh interval, false to disable it + * @return + * true if the settings were updated successfully, false otherwise + */ + override def toggleRefresh(index: String, enable: Boolean): ElasticResult[Boolean] = + measureResult("toggleRefresh", Some(index)) { + delegate.toggleRefresh(index, enable) + } + + /** Set the number of replicas for an index. + * + * @param index + * - the name of the index + * @param replicas + * - the number of replicas to set + * @return + * true if the settings were updated successfully, false otherwise + */ + override def setReplicas(index: String, replicas: Int): ElasticResult[Boolean] = + measureResult("setReplicas", Some(index)) { + delegate.setReplicas(index, replicas) + } + + // ==================== MappingApi ==================== + + override def setMapping(index: String, mapping: String): ElasticResult[Boolean] = { + measureResult("setMapping", Some(index)) { + delegate.setMapping(index, mapping) + } + } + + override def getMapping(index: String): ElasticResult[String] = { + measureResult("getMapping", Some(index)) { + delegate.getMapping(index) + } + } + + /** Get the mapping properties of an index. + * + * @param index + * - the name of the index to get the mapping properties for + * @return + * the mapping properties of the index as a JSON string + */ + override def getMappingProperties(index: String): ElasticResult[String] = + measureResult("getMappingProperties", Some(index)) { + delegate.getMappingProperties(index) + } + + /** Check if the mapping of an index is different from the provided mapping. + * + * @param index + * - the name of the index to check + * @param mapping + * - the mapping to compare with the current mapping of the index + * @return + * true if the mapping is different, false otherwise + */ + override def shouldUpdateMapping(index: String, mapping: String): ElasticResult[Boolean] = + measureResult("shouldUpdateMapping", Some(index)) { + delegate.shouldUpdateMapping(index, mapping) + } + + /** Update the mapping of an index to a new mapping. + * + * This method handles three scenarios: + * 1. Index doesn't exist: Create it with the new mapping 2. Index exists but mapping is + * outdated: Migrate to new mapping 3. Index exists and mapping is current: Do nothing + * + * @param index + * - the name of the index to migrate + * @param mapping + * - the new mapping to set on the index + * @param settings + * - the settings to apply to the index (default is defaultSettings) + * @return + * true if the mapping was created or updated successfully, false otherwise + */ + override def updateMapping( + index: String, + mapping: String, + settings: String + ): ElasticResult[Boolean] = + measureResult("updateMapping", Some(index)) { + delegate.updateMapping(index, mapping, settings) + } + + // ==================== RefreshApi ==================== + + override def refresh(index: String): ElasticResult[Boolean] = { + measureResult("refresh", Some(index)) { + delegate.refresh(index) + } + } + + // ==================== FlushApi ==================== + + override def flush(index: String, force: Boolean, wait: Boolean): ElasticResult[Boolean] = { + measureResult("flush", Some(index)) { + delegate.flush(index, force, wait) + } + } + + // ==================== IndexApi ==================== + + override def index(index: String, id: String, source: String): ElasticResult[Boolean] = { + measureResult("index", Some(index)) { + delegate.index(index, id, source) + } + } + + override def indexAsync(index: String, id: String, source: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = { + measureAsync("indexAsync", Some(index)) { + delegate.indexAsync(index, id, source) + } + } + + /** Index an entity in the given index. + * + * @param entity + * - the entity to index + * @param id + * - the id of the entity to index + * @param index + * - the name of the index to index the entity in (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @return + * true if the entity was indexed successfully, false otherwise + */ + override def indexAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String], + maybeType: Option[String] + )(implicit u: ClassTag[U], formats: Formats): ElasticResult[Boolean] = + measureResult("indexAs", index) { + delegate.indexAs(entity, id, index, maybeType) + } + + /** Index an entity in the given index asynchronously. + * + * @param entity + * - the entity to index + * @param id + * - the id of the entity to index + * @param index + * - the name of the index to index the entity in (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @return + * a Future that completes with true if the entity was indexed successfully, false otherwise + */ + override def indexAsyncAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String], + maybeType: Option[String] + )(implicit + u: ClassTag[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Boolean]] = + measureAsync("indexAsyncAs", index) { + delegate.indexAsyncAs(entity, id, index, maybeType) + } + + // ==================== UpdateApi ==================== + + override def update( + index: String, + id: String, + source: String, + upsert: Boolean + ): ElasticResult[Boolean] = { + measureResult("update", Some(index)) { + delegate.update(index, id, source, upsert) + } + } + + override def updateAsync(index: String, id: String, source: String, upsert: Boolean)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = { + measureAsync("updateAsync", Some(index)) { + delegate.updateAsync(index, id, source, upsert) + } + } + + /** Update an entity in the given index. + * + * @param entity + * - the entity to update + * @param id + * - the id of the entity to update + * @param index + * - the name of the index to update the entity in (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @param upsert + * - true to upsert the entity if it does not exist, false otherwise + * @return + * true if the entity was updated successfully, false otherwise + */ + override def updateAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String], + maybeType: Option[String], + upsert: Boolean + )(implicit u: ClassTag[U], formats: Formats): ElasticResult[Boolean] = + measureResult("updateAs", index) { + delegate.updateAs(entity, id, index, maybeType, upsert) + } + + /** Update an entity in the given index asynchronously. + * + * @param entity + * - the entity to update + * @param id + * - the id of the entity to update + * @param index + * - the name of the index to update the entity in (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @param upsert + * - true to upsert the entity if it does not exist, false otherwise + * @return + * a Future that completes with true if the entity was updated successfully, false otherwise + */ + override def updateAsyncAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String], + maybeType: Option[String], + upsert: Boolean + )(implicit + u: ClassTag[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Boolean]] = + measureAsync("updateAsyncAs", index) { + delegate.updateAsyncAs(entity, id, index, maybeType, upsert) + } + + // ==================== DeleteApi ==================== + + override def delete(id: String, index: String): ElasticResult[Boolean] = { + measureResult("delete", Some(index)) { + delegate.delete(id, index) + } + } + + override def deleteAsync(id: String, index: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = { + measureAsync("deleteAsync", Some(index)) { + delegate.deleteAsync(id, index) + } + } + + // ==================== GetApi ==================== + + /** Get a document by its id from the given index. + * + * @param id + * - the id of the document to get + * @param index + * - the name of the index to get the document from + * @return + * an Option containing the document as a JSON string if it was found, None otherwise + */ + override def get(id: String, index: String): ElasticResult[Option[String]] = + measureResult("get", Some(index)) { + delegate.get(id, index) + } + + override def getAs[U <: AnyRef]( + id: String, + index: Option[String], + maybeType: Option[String] + )(implicit m: Manifest[U], formats: Formats): ElasticResult[Option[U]] = { + measureResult("get", index) { + delegate.getAs[U](id, index, maybeType) + } + } + + /** Get a document by its id from the given index asynchronously. + * + * @param id + * - the id of the document to get + * @param index + * - the name of the index to get the document from + * @return + * a Future that completes with an Option containing the document as a JSON string if it was + * found, None otherwise + */ + override def getAsync(id: String, index: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[String]]] = + measureAsync("getAsync", Some(index)) { + delegate.getAsync(id, index) + } + + /** Get an entity by its id from the given index asynchronously. + * + * @param id + * - the id of the entity to get + * @param index + * - the name of the index to get the entity from (default is the entity type name) + * @param maybeType + * - the type of the entity (default is the entity class name in lowercase) + * @return + * a Future that completes with an Option containing the entity if it was found, None otherwise + */ + override def getAsyncAs[U <: AnyRef]( + id: String, + index: Option[String], + maybeType: Option[String] + )(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Option[U]]] = + measureAsync("getAsyncAs", index) { + delegate.getAsyncAs[U](id, index, maybeType) + } + + // ==================== CountApi ==================== + + override def count(query: ElasticQuery): ElasticResult[Option[Double]] = { + measureResult("count", Some(query.indices.mkString(","))) { + delegate.count(query) + } + } + + /** Count the number of documents matching the given JSON query asynchronously. + * + * @param query + * - the query to count the documents for + * @return + * the number of documents matching the query, or None if the count could not be determined + */ + override def countAsync( + query: ElasticQuery + )(implicit ec: ExecutionContext): Future[ElasticResult[Option[Double]]] = + measureAsync("countAsync", Some(query.indices.mkString(","))) { + delegate.countAsync(query) + } + + // ==================== AggregateApi ================= + + /** Aggregate the results of the given SQL query. + * + * @param sqlQuery + * - the query to aggregate the results for + * @return + * a sequence of aggregated results + */ + override def aggregate(sqlQuery: SQLQuery)(implicit + ec: ExecutionContext + ): Future[ElasticResult[collection.Seq[SingleValueAggregateResult]]] = + measureAsync("aggregate") { + delegate.aggregate(sqlQuery) + } + + // ==================== SearchApi ==================== + + /** Search for documents / aggregations matching the SQL query. + * + * @param sql + * the SQL query to execute + * @return + * the Elasticsearch response + */ + override def search(sql: SQLQuery): ElasticResult[ElasticResponse] = + measureResult("search") { + delegate.search(sql) + } + + /** Asynchronous search for documents / aggregations matching the SQL query. + * + * @param sqlQuery + * the SQL query + * @return + * a Future containing the Elasticsearch response + */ + override def searchAsync( + sqlQuery: SQLQuery + )(implicit ec: ExecutionContext): Future[ElasticResult[ElasticResponse]] = + measureAsync("searchAsync") { + delegate.searchAsync(sqlQuery) + } + + /** Searches and converts results into typed entities from an SQL query. + * + * @param sqlQuery + * the SQL query containing fieldAliases and aggregations + * @tparam U + * the type of entities to return + * @return + * the entities matching the query + */ + override def searchAs[U]( + sqlQuery: SQLQuery + )(implicit m: Manifest[U], formats: Formats): ElasticResult[Seq[U]] = + measureResult("searchAs") { + delegate.searchAs[U](sqlQuery) + } + + /** Asynchronous search with conversion to typed entities. + * + * @param sqlQuery + * the SQL query + * @tparam U + * the type of entities to return + * @return + * a Future containing the entities + */ + override def searchAsyncAs[U](sqlQuery: SQLQuery)(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Seq[U]]] = + measureAsync("searchAsyncAs") { + delegate.searchAsyncAs[U](sqlQuery) + } + + override def singleSearch( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + ): ElasticResult[ElasticResponse] = { + measureResult("search", Some(elasticQuery.indices.mkString(","))) { + delegate.singleSearch(elasticQuery, fieldAliases, aggregations) + } + } + + override def multiSearch( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + ): ElasticResult[ElasticResponse] = { + measureResult("multisearch") { + delegate.multiSearch(elasticQueries, fieldAliases, aggregations) + } + } + + /** Asynchronous search for documents / aggregations matching the Elasticsearch query. + * + * @param elasticQuery + * the Elasticsearch query + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @return + * a Future containing the Elasticsearch response + */ + override def singleSearchAsync( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit ec: ExecutionContext): Future[ElasticResult[ElasticResponse]] = + measureAsync("searchAsync", Some(elasticQuery.indices.mkString(","))) { + delegate.singleSearchAsync(elasticQuery, fieldAliases, aggregations).asInstanceOf + } + + /** Asynchronous multi-search with Elasticsearch queries. + * + * @param elasticQueries + * the Elasticsearch queries + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @return + * a Future containing the combined Elasticsearch response + */ + override def multiSearchAsync( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit ec: ExecutionContext): Future[ElasticResult[ElasticResponse]] = + measureAsync("multisearchAsync") { + delegate.multiSearchAsync(elasticQueries, fieldAliases, aggregations).asInstanceOf + } + + /** Searches and converts results into typed entities. + * + * @param elasticQuery + * the Elasticsearch query + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @tparam U + * the type of entities to return + * @return + * the entities matching the query + */ + override def singleSearchAs[U]( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit m: Manifest[U], formats: Formats): ElasticResult[Seq[U]] = + measureResult("searchAs", Some(elasticQuery.indices.mkString(","))) { + delegate.singleSearchAs(elasticQuery, fieldAliases, aggregations) + } + + /** Multi-search with conversion to typed entities. + * + * @param elasticQueries + * the Elasticsearch queries + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @tparam U + * the type of entities to return + * @return + * the entities matching the queries + */ + override def multisearchAs[U]( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit m: Manifest[U], formats: Formats): ElasticResult[Seq[U]] = + measureResult("multisearchAs") { + delegate.multisearchAs(elasticQueries, fieldAliases, aggregations) + } + + /** Asynchronous search with conversion to typed entities. + * + * @param elasticQuery + * the Elasticsearch query + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @tparam U + * the type of entities to return + * @return + * a Future containing the entities + */ + override def singleSearchAsyncAs[U]( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Seq[U]]] = + measureAsync("searchAsyncAs", Some(elasticQuery.indices.mkString(","))) { + delegate.singleSearchAsyncAs(elasticQuery, fieldAliases, aggregations) + } + + /** Asynchronous multi-search with conversion to typed entities. + * + * @param elasticQueries + * the Elasticsearch queries + * @param fieldAliases + * the field aliases + * @param aggregations + * the SQL aggregations + * @tparam U + * the type of entities to return + * @return + * a Future containing the entities + */ + override def multiSearchAsyncAs[U]( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + )(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Seq[U]]] = + measureAsync("multisearchAsyncAs") { + delegate.multiSearchAsyncAs(elasticQueries, fieldAliases, aggregations) + } + + override def searchWithInnerHits[U: Manifest: ClassTag, I: Manifest: ClassTag]( + sql: SQLQuery, + innerField: String + )(implicit + formats: Formats + ): ElasticResult[Seq[(U, Seq[I])]] = + measureResult("searchWithInnerHits") { + delegate.searchWithInnerHits[U, I](sql, innerField) + } + + override def singleSearchWithInnerHits[U: Manifest: ClassTag, I: Manifest: ClassTag]( + elasticQuery: ElasticQuery, + innerField: String + )(implicit + formats: Formats + ): ElasticResult[Seq[(U, Seq[I])]] = + measureResult("searchWithInnerHits", Some(elasticQuery.indices.mkString(","))) { + delegate.singleSearchWithInnerHits[U, I](elasticQuery, innerField) + } + + override def multisearchWithInnerHits[U: Manifest: ClassTag, I: Manifest: ClassTag]( + elasticQueries: ElasticQueries, + innerField: String + )(implicit + formats: Formats + ): ElasticResult[Seq[(U, Seq[I])]] = + measureResult("multisearchWithInnerHits") { + delegate.multisearchWithInnerHits[U, I](elasticQueries, innerField) + } + + // ==================== ScrollApi ==================== + + /** Create a scrolling source with automatic strategy selection + */ + override def scroll(sql: SQLQuery, config: ScrollConfig)(implicit + system: ActorSystem + ): Source[(Map[String, Any], ScrollMetrics), NotUsed] = { + // Note: For streams, we measure at the beginning but not every element + val startTime = System.currentTimeMillis() + val source = delegate.scroll(sql, config) + + source.watchTermination() { (_, done) => + done.onComplete { result => + val duration = System.currentTimeMillis() - startTime + val success = result.isSuccess + metricsCollector.recordOperation( + "scroll", + duration, + success + ) + }(system.dispatcher) + NotUsed + } + + } + + /** Typed scroll source + */ + override def scrollAs[T](sql: SQLQuery, config: ScrollConfig)(implicit + system: ActorSystem, + m: Manifest[T], + formats: Formats + ): Source[(T, ScrollMetrics), NotUsed] = { + // Note: For streams, we measure at the beginning but not every element + val startTime = System.currentTimeMillis() + val source = delegate.scrollAs[T](sql, config) + + source.watchTermination() { (_, done) => + done.onComplete { result => + val duration = System.currentTimeMillis() - startTime + val success = result.isSuccess + metricsCollector.recordOperation( + "scrollAs", + duration, + success + ) + }(system.dispatcher) + NotUsed + } + } + + // ==================== BulkApi ==================== + + override def bulkWithResult[D]( + items: Iterator[D], + toDocument: D => String, + indexKey: Option[String] = None, + idKey: Option[String] = None, + suffixDateKey: Option[String] = None, + suffixDatePattern: Option[String] = None, + update: Option[Boolean] = None, + delete: Option[Boolean] = None, + parentIdKey: Option[String] = None, + callbacks: BulkCallbacks = BulkCallbacks.default + )(implicit bulkOptions: BulkOptions, system: ActorSystem): Future[BulkResult] = { + implicit val ec: ExecutionContext = system.dispatcher + measureAsync("bulkWithResult", Some(bulkOptions.defaultIndex)) { + delegate.bulkWithResult( + items, + toDocument, + indexKey, + idKey, + suffixDateKey, + suffixDatePattern, + update, + delete, + parentIdKey, + callbacks + ) + } + } + + override def bulkSource[D]( + items: Iterator[D], + toDocument: D => String, + indexKey: Option[String] = None, + idKey: Option[String] = None, + suffixDateKey: Option[String] = None, + suffixDatePattern: Option[String] = None, + update: Option[Boolean] = None, + delete: Option[Boolean] = None, + parentIdKey: Option[String] = None + )(implicit + bulkOptions: BulkOptions, + system: ActorSystem + ): Source[Either[FailedDocument, SuccessfulDocument], NotUsed] = { + val startTime = System.currentTimeMillis() + val source = delegate.bulkSource( + items, + toDocument, + indexKey, + idKey, + suffixDateKey, + suffixDatePattern, + update, + delete, + parentIdKey + ) + source.watchTermination() { (_, done) => + done.onComplete { result => + val duration = System.currentTimeMillis() - startTime + val success = result.isSuccess + metricsCollector.recordOperation( + "bulkSource", + duration, + success, + Some(bulkOptions.defaultIndex) + ) + }(system.dispatcher) + NotUsed + } + } + + override def bulk[D]( + items: Iterator[D], + toDocument: D => String, + indexKey: Option[String] = None, + idKey: Option[String] = None, + suffixDateKey: Option[String] = None, + suffixDatePattern: Option[String] = None, + update: Option[Boolean] = None, + delete: Option[Boolean] = None, + parentIdKey: Option[String] = None + )(implicit bulkOptions: BulkOptions, system: ActorSystem): ElasticResult[BulkResult] = { + measureResult("bulk", Some(bulkOptions.defaultIndex)) { + delegate.bulk( + items, + toDocument, + indexKey, + idKey, + suffixDateKey, + suffixDatePattern, + update, + delete, + parentIdKey + ) + } + } + + // ==================== MetricsApi (délégation) ==================== + + override def recordOperation( + operation: String, + duration: Long, + success: Boolean, + index: Option[String] + ): Unit = { + metricsCollector.recordOperation(operation, duration, success, index) + } + + override def getMetrics: OperationMetrics = metricsCollector.getMetrics + + override def getMetricsByOperation(operation: String): Option[OperationMetrics] = { + metricsCollector.getMetricsByOperation(operation) + } + + override def getMetricsByIndex(index: String): Option[OperationMetrics] = { + metricsCollector.getMetricsByIndex(index) + } + + override def getAggregatedMetrics: AggregatedMetrics = { + metricsCollector.getAggregatedMetrics + } + + override def resetMetrics(): Unit = { + metricsCollector.resetMetrics() + } +} diff --git a/es6/rest/src/main/scala/app/softnetwork/elastic/persistence/query/RestHighLevelClientProvider.scala b/core/src/main/scala/app/softnetwork/elastic/client/metrics/OperationMetrics.scala similarity index 52% rename from es6/rest/src/main/scala/app/softnetwork/elastic/persistence/query/RestHighLevelClientProvider.scala rename to core/src/main/scala/app/softnetwork/elastic/client/metrics/OperationMetrics.scala index 7a93fcf9..5cbe4ea1 100644 --- a/es6/rest/src/main/scala/app/softnetwork/elastic/persistence/query/RestHighLevelClientProvider.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/metrics/OperationMetrics.scala @@ -14,15 +14,23 @@ * limitations under the License. */ -package app.softnetwork.elastic.persistence.query +package app.softnetwork.elastic.client.metrics -import app.softnetwork.elastic.client.rest.RestHighLevelClientApi -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.model.Timestamped +case class OperationMetrics( + operation: String, + totalOperations: Long, + successCount: Long, + failureCount: Long, + totalDuration: Long, + minDuration: Long, + maxDuration: Long, + lastExecutionTime: Long +) { + def averageDuration: Double = + if (totalOperations > 0) totalDuration.toDouble / totalOperations else 0.0 -trait RestHighLevelClientProvider[T <: Timestamped] - extends ElasticProvider[T] - with RestHighLevelClientApi { - _: ManifestWrapper[T] => + def successRate: Double = + if (totalOperations > 0) (successCount.toDouble / totalOperations) * 100 else 0.0 + def failureRate: Double = 100.0 - successRate } diff --git a/core/src/main/scala/app/softnetwork/elastic/client/monitoring/MonitoredElasticClient.scala b/core/src/main/scala/app/softnetwork/elastic/client/monitoring/MonitoredElasticClient.scala new file mode 100644 index 00000000..6adc5dde --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/monitoring/MonitoredElasticClient.scala @@ -0,0 +1,122 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.monitoring + +import akka.actor.{ActorSystem, Cancellable} +import app.softnetwork.elastic.client.metrics.{MetricsCollector, MetricsElasticClient} +import app.softnetwork.elastic.client.ElasticClientApi + +import java.util.concurrent.atomic.AtomicBoolean +import scala.concurrent.duration._ +import scala.util.control.NonFatal + +class MonitoredElasticClient( + delegate: ElasticClientApi, + metricsCollector: MetricsCollector, + monitoringConfig: MonitoringConfig +)(implicit system: ActorSystem = ActorSystem("monitoring-system")) + extends MetricsElasticClient(delegate, metricsCollector) + with AutoCloseable { + + import system.dispatcher + + private val isShutdown = new AtomicBoolean(false) + + // Shutdown hook pour sécurité + sys.addShutdownHook { + if (!isShutdown.get()) { + logger.warn("JVM shutdown detected, forcing client shutdown") + shutdown() + } + } + + private val cancellable: Cancellable = system.scheduler.scheduleAtFixedRate( + FiniteDuration(monitoringConfig.interval.toSeconds, SECONDS), + FiniteDuration(monitoringConfig.interval.toSeconds, SECONDS) + ) { () => + logMetrics() + checkAlerts() + } + + private def logMetrics(): Unit = { + val metrics = getMetrics + logger.info( + s""" + |=== Elasticsearch Metrics === + |Total Operations: ${metrics.totalOperations} + |Success Rate: ${metrics.successRate}% + |Failure Rate: ${metrics.failureRate}% + |Average Duration: ${metrics.averageDuration}ms + |Min Duration: ${metrics.minDuration}ms + |Max Duration: ${metrics.maxDuration}ms + |============================= + """.stripMargin + ) + + // Log par opération + val aggregated = getAggregatedMetrics + aggregated.operationMetrics.foreach { case (op, m) => + if (m.totalOperations > 0) { + logger.debug( + s"[$op] ops=${m.totalOperations}, success=${m.successRate}%, avg=${m.averageDuration}ms" + ) + } + } + } + + private def checkAlerts(): Unit = { + val metrics = getMetrics + + // Alert sur taux d'échec élevé + if (metrics.failureRate > monitoringConfig.failureRateThreshold) { + logger.warn(s"⚠️ HIGH FAILURE RATE: ${metrics.failureRate}%") + } + + // Alert sur latence élevée + if (metrics.averageDuration > monitoringConfig.latencyThreshold) { + logger.warn(s"⚠️ HIGH LATENCY: ${metrics.averageDuration}ms") + } + + // Alerts par opération + val aggregated = getAggregatedMetrics + aggregated.operationMetrics.foreach { case (op, m) => + if (m.failureRate > monitoringConfig.failureRateThreshold) { + logger.warn(s"⚠️ HIGH FAILURE RATE for [$op]: ${m.failureRate}%") + } + if (m.averageDuration > monitoringConfig.latencyThreshold) { + logger.warn(s"⚠️ HIGH LATENCY for [$op]: ${m.averageDuration}ms") + } + } + } + + def shutdown(): Unit = { + if (!isShutdown.getAndSet(true)) { + logger.info("Shutting down MonitoredElasticClient") + try { + logMetrics() + cancellable.cancel() + logger.info("MonitoredElasticClient shut down successfully") + } catch { + case NonFatal(ex) => + logger.error(s"Error during shutdown: ${ex.getMessage}", ex) + throw ex + } + } + } + + override def close(): Unit = shutdown() +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/monitoring/MonitoringConfig.scala b/core/src/main/scala/app/softnetwork/elastic/client/monitoring/MonitoringConfig.scala new file mode 100644 index 00000000..e23e09ce --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/monitoring/MonitoringConfig.scala @@ -0,0 +1,49 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.monitoring + +import scala.concurrent.duration._ + +//format:off +/** Automatic monitoring configuration. + * + * @param enabled + * Enables/disables automatic monitoring + * @param interval + * Interval between metric reports + * @param failureRateThreshold + * Alert threshold for failure rate (%) + * @param latencyThreshold + * Alert threshold for average latency (ms) + * + * @example + * {{{ + * val monitoring = MonitoringConfig( + * enabled = true, + * interval = 30.seconds, + * failureRateThreshold = 10.0, // Alert if > 10% failures + * latencyThreshold = 1000.0 // Alert if > 1000ms + * ) + * }}} + */ +//format:on +case class MonitoringConfig( + enabled: Boolean = true, + interval: Duration = 30.seconds, + failureRateThreshold: Double = 10.0, + latencyThreshold: Double = 1000.0 +) diff --git a/core/src/main/scala/app/softnetwork/elastic/client/monitoring/PrometheusExporter.scala b/core/src/main/scala/app/softnetwork/elastic/client/monitoring/PrometheusExporter.scala new file mode 100644 index 00000000..737b9d7b --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/monitoring/PrometheusExporter.scala @@ -0,0 +1,52 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.monitoring + +import app.softnetwork.elastic.client.metrics.MetricsElasticClient + +trait PrometheusExporter { self: MetricsElasticClient => + + def exportPrometheusMetrics: String = { + val aggregated = getAggregatedMetrics + val sb = new StringBuilder() + + // Global metrics + sb.append(s"elasticsearch_operations_total ${aggregated.totalOperations}\n") + sb.append(s"elasticsearch_operations_success ${aggregated.successCount}\n") + sb.append(s"elasticsearch_operations_failure ${aggregated.failureCount}\n") + sb.append(s"elasticsearch_operations_duration_ms ${aggregated.totalDuration}\n") + + // Per-operation metrics + aggregated.operationMetrics.foreach { case (op, m) => + sb.append(s"""elasticsearch_operation_total{operation="$op"} ${m.totalOperations}""" + "\n") + sb.append( + s"""elasticsearch_operation_duration_ms{operation="$op"} ${m.totalDuration}""" + "\n" + ) + sb.append( + s"""elasticsearch_operation_success_rate{operation="$op"} ${m.successRate}""" + "\n" + ) + } + + // Per-index metrics + aggregated.indexMetrics.foreach { case (idx, m) => + sb.append(s"""elasticsearch_index_operations{index="$idx"} ${m.totalOperations}""" + "\n") + sb.append(s"""elasticsearch_index_duration_ms{index="$idx"} ${m.totalDuration}""" + "\n") + } + + sb.toString() + } +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/package.scala index c6174aa7..412ca2ac 100644 --- a/core/src/main/scala/app/softnetwork/elastic/client/package.scala +++ b/core/src/main/scala/app/softnetwork/elastic/client/package.scala @@ -16,25 +16,44 @@ package app.softnetwork.elastic -import akka.stream.{Attributes, FlowShape, Inlet, Outlet} -import akka.stream.stage.{GraphStage, GraphStageLogic} -import app.softnetwork.elastic.client.BulkAction.BulkAction -import app.softnetwork.serialization._ -import com.google.gson.{Gson, JsonElement, JsonObject} -import org.json4s.Formats +import akka.actor.ActorSystem +import app.softnetwork.elastic.sql.function.aggregate._ +import app.softnetwork.elastic.sql.query.SQLAggregation import org.slf4j.Logger -import scala.collection.immutable.Seq -import scala.collection.mutable -import scala.language.reflectiveCalls -import scala.util.{Failure, Success, Try} - -//import scala.jdk.CollectionConverters._ -import scala.collection.JavaConverters._ +import java.util.concurrent.TimeUnit +import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.duration._ +import scala.language.{implicitConversions, reflectiveCalls} /** Created by smanciot on 30/06/2018. */ -package object client { +package object client extends SerializationApi { + + /** Type alias for JSON query + */ + type JSONQuery = String + + /** Type alias for JSON results + */ + type JSONResults = String + + /** Elastic response case class + * @param query + * - the JSON query + * @param results + * - the JSON results + * @param fieldAliases + * - the field aliases used + * @param aggregations + * - the aggregations expected + */ + case class ElasticResponse( + query: JSONQuery, + results: JSONResults, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] + ) case class ElasticCredentials( url: String = "http://localhost:9200", @@ -42,135 +61,100 @@ package object client { password: String = "" ) - object BulkAction extends Enumeration { - type BulkAction = Value - val INDEX: client.BulkAction.Value = Value(0, "INDEX") - val UPDATE: client.BulkAction.Value = Value(1, "UPDATE") - val DELETE: client.BulkAction.Value = Value(2, "DELETE") - } - - case class BulkItem( - index: String, - action: BulkAction, - body: String, - id: Option[String], - parent: Option[String] - ) - - case class BulkOptions( - index: String, - documentType: String = "_doc", - maxBulkSize: Int = 100, - balance: Int = 1, - disableRefresh: Boolean = false + /** Elastic query wrapper + * @param query + * - the elasticsearch JSON query + * @param indices + * - the target indices + * @param types + * - the target types @deprecated types are deprecated in ES 7+ + */ + case class ElasticQuery(query: JSONQuery, indices: Seq[String], types: Seq[String] = Seq.empty) + + case class ElasticQueries(queries: List[ElasticQuery]) + + /** Retry configuration + */ + case class RetryConfig( + maxRetries: Int = 3, + initialDelay: FiniteDuration = 1.second, + maxDelay: FiniteDuration = 10.seconds, + backoffFactor: Double = 2.0 ) - trait BulkElasticAction { def index: String } - - trait BulkElasticResult { def items: List[BulkElasticResultItem] } - - trait BulkElasticResultItem { def index: String } - - case class BulkSettings[A](disableRefresh: Boolean = false)(implicit - settingsApi: SettingsApi, - toBulkElasticAction: A => BulkElasticAction - ) extends GraphStage[FlowShape[A, A]] { - - val in: Inlet[A] = Inlet[A]("Filter.in") - val out: Outlet[A] = Outlet[A]("Filter.out") - - val shape: FlowShape[A, A] = FlowShape.of(in, out) - - val indices = mutable.Set.empty[String] - - override def createLogic(inheritedAttributes: Attributes): GraphStageLogic = { - new GraphStageLogic(shape) { - setHandler( - in, - () => { - val elem = grab(in) - val index = elem.index - if (!indices.contains(index)) { - if (disableRefresh) { - settingsApi.updateSettings( - index, - """{"index" : {"refresh_interval" : "-1", "number_of_replicas" : 0} }""" - ) - } - indices.add(index) - } - push(out, elem) + /** Retry logic with exponential backoff + */ + // Passer le scheduler en paramètre implicite + private[client] def retryWithBackoff[T](config: RetryConfig)( + operation: => Future[T] + )(implicit + system: ActorSystem, + logger: Logger + ): Future[T] = { + implicit val ec: ExecutionContext = system.dispatcher + val scheduler = system.scheduler + def attempt(retriesLeft: Int, delay: FiniteDuration): Future[T] = { + operation.recoverWith { + case ex if retriesLeft > 0 && isRetriableError(ex) => + logger.warn(s"Retrying after failure ($retriesLeft retries left): ${ex.getMessage}") + akka.pattern.after(delay, scheduler) { + val nextDelay = FiniteDuration( + (delay * config.backoffFactor).min(config.maxDelay).toMillis, + TimeUnit.MILLISECONDS + ) + attempt(retriesLeft - 1, nextDelay) } - ) - setHandler( - out, - () => { - pull(in) - } - ) } } + attempt(config.maxRetries, config.initialDelay) } - def docAsUpsert(doc: String): String = s"""{"doc":$doc,"doc_as_upsert":true}""" - - implicit class InnerHits(searchResult: JsonObject) { - def ~>[M, I]( - innerField: String - )(implicit formats: Formats, m: Manifest[M], i: Manifest[I]): List[(M, List[I])] = { - def innerHits(result: JsonElement) = { - result.getAsJsonObject - .get("inner_hits") - .getAsJsonObject - .get(innerField) - .getAsJsonObject - .get("hits") - .getAsJsonObject - .get("hits") - .getAsJsonArray - .iterator() - } - val gson = new Gson() - val results = searchResult.get("hits").getAsJsonObject.get("hits").getAsJsonArray.iterator() - (for (result <- results.asScala) - yield ( - result match { - case obj: JsonObject => - Try { - serialization.read[M](gson.toJson(obj.get("_source"))) - } match { - case Success(s) => s - case Failure(f) => - throw f - } - case _ => serialization.read[M](result.getAsString) - }, - (for (innerHit <- innerHits(result).asScala) yield innerHit match { - case obj: JsonObject => - Try { - serialization.read[I](gson.toJson(obj.get("_source"))) - } match { - case Success(s) => s - case Failure(f) => - throw f - } - case _ => serialization.read[I](innerHit.getAsString) - }).toList - )).toList - } + /** Determine if an error is retriable + */ + private[client] def isRetriableError(ex: Throwable): Boolean = ex match { + case _: java.net.SocketTimeoutException => true + case _: java.io.IOException => true + // case _: TransportException => true + case _ => false } - case class JSONQuery(query: String, indices: Seq[String], types: Seq[String] = Seq.empty) + /** Aggregation types + */ + object AggregationType extends Enumeration { + type AggregationType = Value + val Count, Min, Max, Avg, Sum, FirstValue, LastValue, ArrayAgg = Value + } - case class JSONQueries(queries: List[JSONQuery]) + /** Client Aggregation + * @param aggName + * - the name of the aggregation + * @param aggType + * - the type of the aggregation + * @param distinct + * - when the aggregation is multivalued define if its values should be returned distinct or + * not + */ + case class ClientAggregation( + aggName: String, + aggType: AggregationType.AggregationType, + distinct: Boolean + ) { + def multivalued: Boolean = aggType == AggregationType.ArrayAgg + def singleValued: Boolean = !multivalued + } - def tryOrElse[T](block: => T, default: => T)(implicit logger: Logger): T = { - try { - block - } catch { - case e: Exception => - logger.error("An error occurred while executing the block", e) - default + implicit def sqlAggregationToClientAggregation(agg: SQLAggregation): ClientAggregation = { + val aggType = agg.aggType match { + case COUNT => AggregationType.Count + case MIN => AggregationType.Min + case MAX => AggregationType.Max + case AVG => AggregationType.Avg + case SUM => AggregationType.Sum + case _: FirstValue => AggregationType.FirstValue + case _: LastValue => AggregationType.LastValue + case _: ArrayAgg => AggregationType.ArrayAgg + case _ => throw new IllegalArgumentException(s"Unsupported aggregation type: ${agg.aggType}") } + ClientAggregation(agg.aggName, aggType, agg.distinct) } } diff --git a/core/src/main/scala/app/softnetwork/elastic/client/result/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/result/package.scala new file mode 100644 index 00000000..23861a22 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/result/package.scala @@ -0,0 +1,333 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +import scala.util.control.NonFatal + +package object result { + + /** Represents the result of an Elasticsearch operation. + * + * @tparam T + * Value type on success + */ + sealed trait ElasticResult[+T] { + + /** Indicates whether the operation succeeded */ + def isSuccess: Boolean + + /** Indicates whether the operation failed */ + def isFailure: Boolean = !isSuccess + + /** Transforms the value on success */ + def map[U](f: T => U): ElasticResult[U] + + /** Monadic composition */ + def flatMap[U](f: T => ElasticResult[U]): ElasticResult[U] + + /** Retrieves the value or a default value */ + def getOrElse[U >: T](default: => U): U + + /** Converts to Option */ + def toOption: Option[T] + + /** Converts to Either */ + def toEither: Either[ElasticError, T] + + /** Fold pattern matching */ + def fold[U](onFailure: ElasticError => U, onSuccess: T => U): U + + /** Recovers the error if it fails */ + def error: Option[ElasticError] + + /** Performs a side effect if it succeeds */ + def foreach(f: T => Unit): Unit + + /** Filters the result */ + def filter(p: T => Boolean, errorMsg: String = "Filter predicate failed"): ElasticResult[T] + + /** Retrieves the value or throws an exception */ + def get: T + } + + /** Represents a successful operation. + */ + case class ElasticSuccess[T](value: T) extends ElasticResult[T] { + override def isSuccess: Boolean = true + + override def map[U](f: T => U): ElasticResult[U] = + try { + ElasticSuccess(f(value)) + } catch { + case NonFatal(ex) => + ElasticFailure( + ElasticError( + s"Error during map transformation: ${ex.getMessage}", + Some(ex) + ) + ) + } + + override def flatMap[U](f: T => ElasticResult[U]): ElasticResult[U] = + try { + f(value) + } catch { + case NonFatal(ex) => + ElasticFailure( + ElasticError( + s"Error during flatMap transformation: ${ex.getMessage}", + Some(ex) + ) + ) + } + + override def getOrElse[U >: T](default: => U): U = value + + override def toOption: Option[T] = Some(value) + + override def toEither: Either[ElasticError, T] = Right(value) + + override def fold[U](onFailure: ElasticError => U, onSuccess: T => U): U = onSuccess(value) + + override def error: Option[ElasticError] = None + + override def foreach(f: T => Unit): Unit = f(value) + + override def filter(p: T => Boolean, errorMsg: String): ElasticResult[T] = + if (p(value)) this + else ElasticFailure(ElasticError(errorMsg)) + + override def get: T = value + } + + /** Represents a failed operation. + */ + case class ElasticFailure(elasticError: ElasticError) extends ElasticResult[Nothing] { + override def isSuccess: Boolean = false + + override def map[U](f: Nothing => U): ElasticResult[U] = this + + override def flatMap[U](f: Nothing => ElasticResult[U]): ElasticResult[U] = this + + override def getOrElse[U](default: => U): U = default + + override def toOption: Option[Nothing] = None + + override def toEither: Either[ElasticError, Nothing] = Left(elasticError) + + override def fold[U](onFailure: ElasticError => U, onSuccess: Nothing => U): U = + onFailure(elasticError) + + override def error: Option[ElasticError] = Some(elasticError) + + override def foreach(f: Nothing => Unit): Unit = () + + override def filter(p: Nothing => Boolean, errorMsg: String): ElasticResult[Nothing] = this + + override def get: Nothing = throw new NoSuchElementException( + s"ElasticFailure.get: ${elasticError.message}" + ) + } + + /** Represents an Elasticsearch error. + */ + case class ElasticError( + message: String, + cause: Option[Throwable] = None, + statusCode: Option[Int] = None, + index: Option[String] = None, + operation: Option[String] = None + ) { + + /** Complete message with context */ + def fullMessage: String = { + val parts = Seq( + operation.map(op => s"[$op]"), + index.map(idx => s"index=$idx"), + statusCode.map(code => s"status=$code"), + Some(message) + ).flatten + + parts.mkString(" ") + } + + /** Log the error with a logger */ + def log(logger: org.slf4j.Logger): Unit = { + cause match { + case Some(ex) => logger.error(fullMessage, ex) + case None => logger.error(fullMessage) + } + } + } + + /** Companion object with utility methods. + */ + object ElasticResult { + + /** Creates a success. + */ + def success[T](value: T): ElasticResult[T] = ElasticSuccess(value) + + /** Creates a failure. + */ + def failure[T](error: ElasticError): ElasticResult[T] = ElasticFailure(error) + + /** Creates a failure with a simple message. + */ + def failure[T](message: String): ElasticResult[T] = + ElasticFailure(ElasticError(message)) + + /** Creates a failure with a message and an exception. + */ + def failure[T](message: String, cause: Throwable): ElasticResult[T] = + ElasticFailure(ElasticError(message, Some(cause))) + + /** Runs a block of code and catches exceptions. + */ + def attempt[T](block: => T): ElasticResult[T] = + try { + ElasticSuccess(block) + } catch { + case NonFatal(ex) => + ElasticFailure( + ElasticError( + s"Operation failed: ${ex.getMessage}", + Some(ex) + ) + ) + } + + /** Converts an Option to ElasticResult. + */ + def fromOption[T](option: Option[T], errorMsg: => String): ElasticResult[T] = + option match { + case Some(value) => ElasticSuccess(value) + case None => ElasticFailure(ElasticError(errorMsg)) + } + + /** Converts an Either to ElasticResult. + */ + def fromEither[T](either: Either[String, T]): ElasticResult[T] = + either match { + case Right(value) => ElasticSuccess(value) + case Left(error) => ElasticFailure(ElasticError(error)) + } + + /** Converts a Try to ElasticResult. + */ + def fromTry[T](tryValue: scala.util.Try[T]): ElasticResult[T] = + tryValue match { + case scala.util.Success(value) => ElasticSuccess(value) + case scala.util.Failure(ex) => + ElasticFailure( + ElasticError( + s"Operation failed: ${ex.getMessage}", + Some(ex) + ) + ) + } + + def fromFuture[T](future: scala.concurrent.Future[T])(implicit + ec: scala.concurrent.ExecutionContext + ): scala.concurrent.Future[ElasticResult[T]] = { + future + .map(value => ElasticSuccess(value)) + .recover { case NonFatal(ex) => + ElasticFailure( + ElasticError( + s"Operation failed: ${ex.getMessage}", + Some(ex) + ) + ) + } + } + + /** Sequences a list of results. Returns success with the list of values if all succeed, + * otherwise returns the first failure. + */ + def sequence[T](results: List[ElasticResult[T]]): ElasticResult[List[T]] = { + results.foldRight(success(List.empty[T]): ElasticResult[List[T]]) { (result, acc) => + for { + value <- result + list <- acc + } yield value :: list + } + } + + /** Traverses a list with a function that returns an ElasticResult. + */ + def traverse[T, U](list: List[T])(f: T => ElasticResult[U]): ElasticResult[List[U]] = { + sequence(list.map(f)) + } + + /** Implicit class to add methods to Boolean in ElasticResult. + */ + implicit class BooleanElasticResult(result: ElasticResult[Boolean]) { + + /** Checks if the result is successful AND true */ + def isTrue: Boolean = result match { + case ElasticSuccess(true) => true + case _ => false + } + + /** Checks if the result is successful AND false */ + def isFalse: Boolean = result match { + case ElasticSuccess(false) => true + case _ => false + } + + /** Returns true if successful, false otherwise (ignores the value) */ + def succeeded: Boolean = result.isSuccess + } + + /** Implicit class for logging. + */ + implicit class LoggableElasticResult[T](result: ElasticResult[T]) { + + /** Log error if failure */ + def logError(logger: org.slf4j.Logger): ElasticResult[T] = { + result match { + case ElasticFailure(error) => error.log(logger) + case _ => () + } + result + } + + /** Log success */ + def logSuccess(logger: org.slf4j.Logger, message: T => String): ElasticResult[T] = { + result match { + case ElasticSuccess(value) => logger.info(message(value)) + case _ => () + } + result + } + + /** Log success or failure */ + def log( + logger: org.slf4j.Logger, + onSuccess: T => String, + onFailure: ElasticError => String + ): ElasticResult[T] = { + result match { + case ElasticSuccess(value) => logger.info(onSuccess(value)) + case ElasticFailure(error) => logger.error(onFailure(error)) + } + result + } + } + } +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/scroll/package.scala b/core/src/main/scala/app/softnetwork/elastic/client/scroll/package.scala new file mode 100644 index 00000000..36640ea0 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/scroll/package.scala @@ -0,0 +1,55 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client + +package object scroll { + + /** Scroll configuration + */ + case class ScrollConfig( + keepAlive: String = "1m", // Keep-alive time for scroll context + scrollSize: Int = 1000, // Number of documents per batch + logEvery: Int = 10, // Log progress every n batches + maxDocuments: Option[Long] = None, // Optional maximum number of documents to retrieve + preferSearchAfter: Boolean = true, // Prefer search_after over scroll when possible + metrics: ScrollMetrics = ScrollMetrics(), // Initial scroll metrics + retryConfig: RetryConfig = RetryConfig() // Retry configuration + ) + + /** Scroll strategy based on query type + */ + sealed trait ScrollStrategy + case object UsePIT + extends ScrollStrategy // Point In Time + search_after (ES 7.10+, best performance) + case object UseScroll extends ScrollStrategy // Classic scroll (supports aggregations) + case object UseSearchAfter + extends ScrollStrategy // search_after only (efficient, no server state) + + /** Scroll metrics + */ + case class ScrollMetrics( + totalDocuments: Long = 0, + totalBatches: Long = 0, + startTime: Long = System.currentTimeMillis(), + endTime: Option[Long] = None + ) { + def duration: Long = endTime.getOrElse(System.currentTimeMillis()) - startTime + def documentsPerSecond: Double = totalDocuments.toDouble / (duration / 1000.0) + def complete: ScrollMetrics = copy(endTime = Some(System.currentTimeMillis())) + } + +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/spi/ElasticClientFactory.scala b/core/src/main/scala/app/softnetwork/elastic/client/spi/ElasticClientFactory.scala new file mode 100644 index 00000000..4f5838a5 --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/spi/ElasticClientFactory.scala @@ -0,0 +1,353 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.spi + +import app.softnetwork.elastic.client._ +import app.softnetwork.elastic.client.metrics.{MetricsCollector, MetricsElasticClient} +import app.softnetwork.elastic.client.monitoring.MonitoredElasticClient +import com.typesafe.config.{Config, ConfigFactory} +import org.slf4j.{Logger, LoggerFactory} + +import java.util.ServiceLoader +import java.util.concurrent.ConcurrentHashMap +import scala.jdk.CollectionConverters._ +import scala.util.control.NonFatal + +/** Factory for creating Elasticsearch clients with optional metrics and monitoring. + * + * This factory uses the Service Provider Interface (SPI) pattern to load Elasticsearch client + * implementations and provides caching to avoid creating multiple instances for the same + * configuration. + * + * Thread-safe implementation using ConcurrentHashMap. + */ +object ElasticClientFactory { + + private val logger: Logger = LoggerFactory.getLogger(getClass) + + private[this] val factories: ServiceLoader[ElasticClientSpi] = + ServiceLoader.load(classOf[ElasticClientSpi]) + + // Use String key (URL) instead of Config for reliable caching + private[this] val clientsByUrl = new ConcurrentHashMap[String, ElasticClientApi]() + private[this] val metricsClientsByUrl = new ConcurrentHashMap[String, MetricsElasticClient]() + private[this] val monitoredClientsByUrl = new ConcurrentHashMap[String, MonitoredElasticClient]() + + // Shutdown hook to close all clients + sys.addShutdownHook { + logger.info("JVM shutdown detected, closing all Elasticsearch clients") + shutdown() + } + + /** Gets or creates a base Elasticsearch client for the given configuration. + * + * @param config + * Typesafe configuration + * @return + * Base Elasticsearch client + */ + private[this] def getOrCreateBaseClient(config: Config): ElasticClientApi = { + val elasticConfig = ElasticConfig(config) + val url = elasticConfig.credentials.url + + clientsByUrl.computeIfAbsent( + url, + _ => { + logger.info(s"Creating new Elasticsearch client for URL: $url") + factories + .iterator() + .asScala + .map(_.client(config)) + .toSeq + .headOption + .getOrElse(throw new IllegalStateException("No ElasticClientSpi implementation found")) + } + ) + } + + //format:off + /** Creates an Elasticsearch client with optional metrics and monitoring. + * + * The returned client type depends on configuration: + * - If metrics.enabled = false: Base client without metrics + * - If metrics.enabled = true and monitoring.enabled = false: Client with metrics + * - If metrics.enabled = true and monitoring.enabled = true: Client with metrics and + * monitoring + * + * @param config + * Typesafe configuration + * @return + * Configured Elasticsearch client + * + * @example + * {{{ + * val config = ConfigFactory.load() + * + * // Client created according to configuration + * val client = ElasticClientFactory.create(config) + * + * // Normal usage + * client.createIndex("products") + * client.index("products", "123", """{"name": "Product"}""") + * + * // Access metrics if enabled + * client match { + * case metricsClient: MetricsElasticClient => + * val metrics = metricsClient.getMetrics + * println(s"Operations: ${metrics.totalOperations}") + * case _ => println("Metrics not enabled") + * } + * }}} + */ + //format:on + def create(config: Config = ConfigFactory.load()): ElasticClientApi = { + val elasticConfig = ElasticConfig(config) + + if (elasticConfig.metrics.enabled) { + val monitoringConfig = elasticConfig.metrics.monitoring + if (monitoringConfig.enabled) { + createWithMonitoring(config) + } else { + createWithMetrics(config) + } + } else { + getOrCreateBaseClient(config) + } + } + + //format:off + /** Creates a client with explicitly enabled metrics, regardless of configuration. + * + * Uses caching: multiple calls with the same URL return the same instance. + * + * @param config + * Typesafe configuration + * @return + * MetricsElasticClient with active metrics collection + * + * @example + * {{{ + * val config = ConfigFactory.load() + * val client = ElasticClientFactory.createWithMetrics(config) + * + * // Operations + * client.createIndex("logs") + * client.index("logs", "1", """{"message": "test"}""") + * + * // Metrics always available + * val metrics = client.getMetrics + * println(s"Total ops: ${metrics.totalOperations}") + * + * // Metrics by operation + * client.getMetricsByOperation("index").foreach { m => + * println(s"Index ops: ${m.totalOperations}") + * println(s"Avg duration: ${m.averageDuration}ms") + * } + * }}} + */ + //format:on + def createWithMetrics(config: Config = ConfigFactory.load()): MetricsElasticClient = { + val elasticConfig = ElasticConfig(config) + val url = elasticConfig.credentials.url + + metricsClientsByUrl.computeIfAbsent( + url, + _ => { + logger.info(s"Creating new MetricsElasticClient for URL: $url") + val baseClient = getOrCreateBaseClient(config) + val metricsCollector = new MetricsCollector() + new MetricsElasticClient(baseClient, metricsCollector) + } + ) + } + + //format:off + /** Creates a client with a custom metrics collector. + * + * Useful for sharing a collector between multiple clients or for testing. Does NOT use caching - + * always creates a new client instance. + * + * @param config + * Typesafe configuration + * @param metricsCollector + * Custom metrics collector + * @return + * MetricsElasticClient using the provided collector + * + * @example + * {{{ + * val config = ConfigFactory.load() + * val sharedCollector = new MetricsCollector() + * + * // Multiple clients sharing the same collector + * val client1 = ElasticClientFactory.createWithCustomMetrics(config, sharedCollector) + * val client2 = ElasticClientFactory.createWithCustomMetrics(config, sharedCollector) + * + * // Operations on both clients + * client1.createIndex("index1") + * client2.createIndex("index2") + * + * // Global metrics for both clients + * val metrics = sharedCollector.getMetrics + * println(s"Total ops across all clients: ${metrics.totalOperations}") + * }}} + */ + //format:on + def createWithCustomMetrics( + config: Config = ConfigFactory.load(), + metricsCollector: MetricsCollector + ): MetricsElasticClient = { + logger.info("Creating new MetricsElasticClient with custom collector") + val baseClient = getOrCreateBaseClient(config) + new MetricsElasticClient(baseClient, metricsCollector) + } + + //format:off + /** Creates a client with automatic monitoring and alerting. + * + * Monitoring generates periodic reports and triggers alerts when configured thresholds are + * exceeded. Uses caching: multiple calls with the same URL return the same instance. + * + * @param config + * Typesafe configuration + * @return + * MonitoredElasticClient with active monitoring + * + * @example + * {{{ + * val config = ConfigFactory.load() + * + * val client = ElasticClientFactory.createWithMonitoring(config) + * + * // Monitoring starts automatically + * // Logs every 30s (according to config): + * // === Elasticsearch Metrics === + * // Total Operations: 150 + * // Success Rate: 98.5% + * // Average Duration: 45ms + * // ============================= + * + * // Normal operations + * client.createIndex("monitored-index") + * + * // If failure rate > threshold, automatic alert: + * // ⚠️ HIGH FAILURE RATE: 15.0% + * + * // Stop monitoring gracefully + * client.shutdown() + * }}} + */ + //format:on + def createWithMonitoring(config: Config = ConfigFactory.load()): MonitoredElasticClient = { + val elasticConfig = ElasticConfig(config) + val url = elasticConfig.credentials.url + + monitoredClientsByUrl.computeIfAbsent( + url, + _ => { + logger.info(s"Creating new MonitoredElasticClient for URL: $url") + val baseClient = getOrCreateBaseClient(config) + val metricsCollector = new MetricsCollector() + val monitoringConfig = elasticConfig.metrics.monitoring + new MonitoredElasticClient(baseClient, metricsCollector, monitoringConfig) + } + ) + } + + /** Creates a monitored client with a custom metrics collector. + * + * Does NOT use caching - always creates a new client instance. + * + * @param config + * Typesafe configuration + * @param metricsCollector + * Custom metrics collector + * @return + * MonitoredElasticClient using the provided collector + */ + def createMonitoredWithCustomMetrics( + config: Config = ConfigFactory.load(), + metricsCollector: MetricsCollector + ): MonitoredElasticClient = { + logger.info("Creating new MonitoredElasticClient with custom collector") + val elasticConfig = ElasticConfig(config) + val baseClient = getOrCreateBaseClient(config) + val monitoringConfig = elasticConfig.metrics.monitoring + new MonitoredElasticClient(baseClient, metricsCollector, monitoringConfig) + } + + /** Shuts down all cached clients. + * + * This method should be called when the application terminates. It's automatically called via a + * JVM shutdown hook. + */ + def shutdown(): Unit = { + logger.info("Shutting down all Elasticsearch clients") + + // Shutdown monitored clients first (they have schedulers) + monitoredClientsByUrl.values().asScala.foreach { client => + try { + client.shutdown() + } catch { + case NonFatal(ex) => + logger.error(s"Error shutting down monitored client: ${ex.getMessage}", ex) + } + } + + // Clear caches + monitoredClientsByUrl.clear() + metricsClientsByUrl.clear() + + // Shutdown base clients + clientsByUrl.values().asScala.foreach { client => + try { + client.close() + } catch { + case NonFatal(ex) => + logger.error(s"Error shutting down base client: ${ex.getMessage}", ex) + } + } + + clientsByUrl.clear() + + logger.info("All Elasticsearch clients shut down") + } + + /** Clears all caches without shutting down clients. + * + * Useful for testing. Use with caution in production. + */ + def clearCache(): Unit = { + logger.warn("Clearing Elasticsearch client cache") + clientsByUrl.clear() + metricsClientsByUrl.clear() + monitoredClientsByUrl.clear() + } + + /** Gets statistics about cached clients. + * + * @return + * Map with cache statistics + */ + def getCacheStats: Map[String, Int] = { + Map( + "baseClients" -> clientsByUrl.size(), + "metricsClients" -> metricsClientsByUrl.size(), + "monitoredClients" -> monitoredClientsByUrl.size() + ) + } +} diff --git a/core/src/main/scala/app/softnetwork/elastic/client/spi/ElasticClientSpi.scala b/core/src/main/scala/app/softnetwork/elastic/client/spi/ElasticClientSpi.scala new file mode 100644 index 00000000..936c13ed --- /dev/null +++ b/core/src/main/scala/app/softnetwork/elastic/client/spi/ElasticClientSpi.scala @@ -0,0 +1,45 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.spi + +import app.softnetwork.elastic.client.ElasticClientApi +import com.typesafe.config.Config + +/** Service Provider Interface for creating Elasticsearch client instances. + */ +trait ElasticClientSpi { + + //format:off + /** Creates an Elasticsearch client instance. + * + * @param conf + * Typesafe configuration containing Elasticsearch parameters + * @return + * Configured ElasticClientApi instance + * + * @example + * {{{ + * class MyElasticClientProvider extends ElasticClientSpi { + * override def client(config: Config): ElasticClientApi = { + * new MyElasticClientImpl(config) + * } + * } + * }}} + */ + //format:on + def client(conf: Config): ElasticClientApi +} diff --git a/core/src/test/scala/app/softnetwork/elastic/client/AliasApiSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/AliasApiSpec.scala new file mode 100644 index 00000000..5ea8bb8f --- /dev/null +++ b/core/src/test/scala/app/softnetwork/elastic/client/AliasApiSpec.scala @@ -0,0 +1,2174 @@ +package app.softnetwork.elastic.client + +import org.scalatest.wordspec.AnyWordSpec +import org.scalatest.matchers.should.Matchers +import org.scalatest.BeforeAndAfterEach +import org.mockito.MockitoSugar +import org.mockito.ArgumentMatchersSugar +import org.slf4j.Logger +import app.softnetwork.elastic.client.result._ + +/** Unit tests for AliasApi + */ +class AliasApiSpec + extends AnyWordSpec + with Matchers + with BeforeAndAfterEach + with MockitoSugar + with ArgumentMatchersSugar { + + // Mock logger + val mockLogger: Logger = mock[Logger] + + // Concrete implementation for testing + class TestAliasApi extends AliasApi with IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + // Control variables + var executeAddAliasResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeRemoveAliasResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeAliasExistsResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeGetAliasesResult: ElasticResult[String] = ElasticSuccess( + """{"my-index":{"aliases":{"my-alias":{}}}}""" + ) + var executeSwapAliasResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeIndexExistsResult: ElasticResult[Boolean] = ElasticSuccess(true) + + override private[client] def executeAddAlias( + index: String, + alias: String + ): ElasticResult[Boolean] = { + executeAddAliasResult + } + + override private[client] def executeRemoveAlias( + index: String, + alias: String + ): ElasticResult[Boolean] = { + executeRemoveAliasResult + } + + override private[client] def executeAliasExists(alias: String): ElasticResult[Boolean] = { + executeAliasExistsResult + } + + override private[client] def executeGetAliases(index: String): ElasticResult[String] = { + executeGetAliasesResult + } + + override private[client] def executeSwapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): ElasticResult[Boolean] = { + executeSwapAliasResult + } + + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = { + executeIndexExistsResult + } + + // Other required methods + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + var aliasApi: TestAliasApi = _ + + override def beforeEach(): Unit = { + super.beforeEach() + aliasApi = new TestAliasApi() + reset(mockLogger) + } + + "AliasApi" should { + + "addAlias" should { + + "successfully add an alias to an index" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias("my-index", "my-alias") + + // Then + result.isSuccess shouldBe true + result.get shouldBe true + + verify(mockLogger).debug("Adding alias 'my-alias' to index 'my-index'") + verify(mockLogger).info("✅ Alias 'my-alias' successfully added to index 'my-index'") + } + + "reject invalid index name" in { + // When + val result = aliasApi.addAlias("INVALID", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.message should include("lowercase") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("addAlias") + + verify(mockLogger, never).debug(any[String]) + } + + "reject empty index name" in { + // When + val result = aliasApi.addAlias("", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.message should include("cannot be empty") + } + + "reject null index name" in { + // When + val result = aliasApi.addAlias(null, "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + } + + "reject invalid alias name" in { + // When + val result = aliasApi.addAlias("my-index", "INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid alias") + result.error.get.message should include("lowercase") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("addAlias") + } + + "reject empty alias name" in { + // When + val result = aliasApi.addAlias("my-index", "") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid alias") + result.error.get.message should include("cannot be empty") + } + + "reject when index and alias have the same name" in { + // When + val result = aliasApi.addAlias("my-index", "my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Index and alias cannot have the same name: 'my-index'" + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("addAlias") + } + + "fail when index does not exist" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(false) + + // When + val result = aliasApi.addAlias("my-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Index 'my-index' does not exist" + result.error.get.statusCode shouldBe Some(404) + result.error.get.operation shouldBe Some("addAlias") + + verify(mockLogger, never).debug(contains("Adding alias")) + } + + "fail when indexExists check fails" in { + // Given + val error = ElasticError("Connection timeout", statusCode = Some(504)) + aliasApi.executeIndexExistsResult = ElasticFailure(error) + + // When + val result = aliasApi.addAlias("my-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Connection timeout" + result.error.get.statusCode shouldBe Some(504) + + verify(mockLogger, never).debug(contains("Adding alias")) + } + + "fail when executeAddAlias fails" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + val error = ElasticError("Alias already exists", statusCode = Some(400)) + aliasApi.executeAddAliasResult = ElasticFailure(error) + + // When + val result = aliasApi.addAlias("my-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Alias already exists" + + verify(mockLogger).debug("Adding alias 'my-alias' to index 'my-index'") + verify(mockLogger).error( + "❌ Failed to add alias 'my-alias' to index 'my-index': Alias already exists" + ) + } + + "validate in correct order: index, alias, same name, existence" in { + // Given - Invalid index should fail first + val result1 = aliasApi.addAlias("INVALID", "INVALID-ALIAS") + result1.error.get.message should include("Invalid index") + + // Given - Invalid alias should fail after index validation + val result2 = aliasApi.addAlias("my-index", "INVALID") + result2.error.get.message should include("Invalid alias") + + // Given - Same name check should fail after name validation + val result3 = aliasApi.addAlias("my-index", "my-index") + result3.error.get.message should include("same name") + + // Given - Existence check should fail last + aliasApi.executeIndexExistsResult = ElasticSuccess(false) + val result4 = aliasApi.addAlias("my-index", "my-alias") + result4.error.get.message should include("does not exist") + } + + "handle special characters in valid names" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias("my-index-2024", "my-alias_v1.0") + + // Then + result.isSuccess shouldBe true + } + + "reject alias names with forbidden characters" in { + val forbiddenChars = List("\\", "/", "*", "?", "\"", "<", ">", "|", " ", ",", "#") + + forbiddenChars.foreach { char => + val result = aliasApi.addAlias("my-index", s"alias${char}name") + result.isFailure shouldBe true + } + } + + "reject alias starting with forbidden characters" in { + val result1 = aliasApi.addAlias("my-index", "-alias") + result1.isFailure shouldBe true + + val result2 = aliasApi.addAlias("my-index", "_alias") + result2.isFailure shouldBe true + + val result3 = aliasApi.addAlias("my-index", "+alias") + result3.isFailure shouldBe true + } + + "reject alias named '.' or '..'" in { + val result1 = aliasApi.addAlias("my-index", ".") + result1.isFailure shouldBe true + + val result2 = aliasApi.addAlias("my-index", "..") + result2.isFailure shouldBe true + } + + "handle alias name with maximum length" in { + // Given + val maxAlias = "a" * 255 + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias("my-index", maxAlias) + + // Then + result.isSuccess shouldBe true + } + + "reject alias name exceeding maximum length" in { + // Given + val tooLongAlias = "a" * 256 + + // When + val result = aliasApi.addAlias("my-index", tooLongAlias) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid alias") + } + } + + "removeAlias" should { + + "successfully remove an alias from an index" in { + // Given + aliasApi.executeRemoveAliasResult = ElasticSuccess(true) + + // When + val result = aliasApi.removeAlias("my-index", "my-alias") + + // Then + result.isSuccess shouldBe true + result.get shouldBe true + + verify(mockLogger).debug("Removing alias 'my-alias' from index 'my-index'") + verify(mockLogger).info("✅ Alias 'my-alias' successfully removed from index 'my-index'") + } + + "reject invalid index name" in { + // When + val result = aliasApi.removeAlias("INVALID", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("removeAlias") + + verify(mockLogger, never).debug(any[String]) + } + + "reject invalid alias name" in { + // When + val result = aliasApi.removeAlias("my-index", "INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid alias") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("removeAlias") + } + + "fail when alias does not exist (404)" in { + // Given + val error = ElasticError("Alias not found", statusCode = Some(404)) + aliasApi.executeRemoveAliasResult = ElasticFailure(error) + + // When + val result = aliasApi.removeAlias("my-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(404) + + verify(mockLogger).debug("Removing alias 'my-alias' from index 'my-index'") + verify(mockLogger).error( + "❌ Failed to remove alias 'my-alias' from index 'my-index': Alias not found" + ) + } + + "fail when executeRemoveAlias fails" in { + // Given + val error = ElasticError("Server error", statusCode = Some(500)) + aliasApi.executeRemoveAliasResult = ElasticFailure(error) + + // When + val result = aliasApi.removeAlias("my-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Server error" + + verify(mockLogger).error(contains("Failed to remove alias")) + } + + "validate index name before alias name" in { + // When + val result = aliasApi.removeAlias("INVALID", "INVALID-ALIAS") + + // Then + result.error.get.message should include("Invalid index") + result.error.get.message should not include "Invalid alias" + } + + "handle empty index name" in { + // When + val result = aliasApi.removeAlias("", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + } + + "handle empty alias name" in { + // When + val result = aliasApi.removeAlias("my-index", "") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid alias") + } + } + + "aliasExists" should { + + "return true when alias exists" in { + // Given + aliasApi.executeAliasExistsResult = ElasticSuccess(true) + + // When + val result = aliasApi.aliasExists("my-alias") + + // Then + result.isSuccess shouldBe true + result.get shouldBe true + + verify(mockLogger).debug("Checking if alias 'my-alias' exists") + verify(mockLogger).info("✅ Alias 'my-alias' exists") + } + + "return false when alias does not exist" in { + // Given + aliasApi.executeAliasExistsResult = ElasticSuccess(false) + + // When + val result = aliasApi.aliasExists("my-alias") + + // Then + result.isSuccess shouldBe true + result.get shouldBe false + + verify(mockLogger).debug("Checking if alias 'my-alias' exists") + verify(mockLogger).info("✅ Alias 'my-alias' does not exist") + } + + "reject invalid alias name" in { + // When + val result = aliasApi.aliasExists("INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid alias name") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("aliasExists") + + verify(mockLogger, never).debug(any[String]) + } + + "reject empty alias name" in { + // When + val result = aliasApi.aliasExists("") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid alias name") + } + + "fail when executeAliasExists fails" in { + // Given + val error = ElasticError("Connection timeout", statusCode = Some(504)) + aliasApi.executeAliasExistsResult = ElasticFailure(error) + + // When + val result = aliasApi.aliasExists("my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Connection timeout" + + verify(mockLogger).debug("Checking if alias 'my-alias' exists") + verify(mockLogger).error( + "❌ Failed to check existence of alias 'my-alias': Connection timeout" + ) + } + + "handle network errors" in { + // Given + val error = ElasticError( + "Network error", + cause = Some(new java.net.SocketTimeoutException()), + statusCode = Some(504) + ) + aliasApi.executeAliasExistsResult = ElasticFailure(error) + + // When + val result = aliasApi.aliasExists("my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + } + } + + "getAliases" should { + + "successfully retrieve aliases from an index" in { + // Given + val jsonResponse = """{"my-index":{"aliases":{"alias1":{},"alias2":{}}}}""" + aliasApi.executeGetAliasesResult = ElasticSuccess(jsonResponse) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe Set("alias1", "alias2") + + verify(mockLogger).debug("Getting aliases for index 'my-index'") + verify(mockLogger).debug("Found 2 alias(es) for index 'my-index': alias1, alias2") + verify(mockLogger).info("✅ Found 2 alias(es) for index 'my-index': alias1, alias2") + } + + "return empty set when index has no aliases" in { + // Given + val jsonResponse = """{"my-index":{"aliases":{}}}""" + aliasApi.executeGetAliasesResult = ElasticSuccess(jsonResponse) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe Set.empty + + verify(mockLogger).debug("No aliases found for index 'my-index'") + verify(mockLogger).info("✅ No aliases found for index 'my-index'") + } + + "return empty set when aliases object is null" in { + // Given + val jsonResponse = """{"my-index":{"mappings":{}}}""" + aliasApi.executeGetAliasesResult = ElasticSuccess(jsonResponse) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe Set.empty + } + + "return empty set when index not found in response" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess("{}") + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe Set.empty + + verify(mockLogger).warn("Index 'my-index' not found in response") + } + + "reject invalid index name" in { + // When + val result = aliasApi.getAliases("INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index name") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("getAliases") + + verify(mockLogger, never).debug(any[String]) + } + + "fail when executeGetAliases fails" in { + // Given + val error = ElasticError("Index not found", statusCode = Some(404)) + aliasApi.executeGetAliasesResult = ElasticFailure(error) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Index not found" + + verify(mockLogger).debug("Getting aliases for index 'my-index'") + verify(mockLogger).error("❌ Failed to get aliases for index 'my-index': Index not found") + } + + "fail when JSON parsing fails" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess("invalid json {") + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isFailure shouldBe true + + verify(mockLogger).error(contains("Failed to parse aliases JSON")) + } + + "handle complex JSON structure" in { + // Given + val complexJson = + """{ + | "my-index": { + | "aliases": { + | "alias1": {"filter": {"term": {"user": "kimchy"}}}, + | "alias2": {"routing": "1"}, + | "alias3": {} + | } + | } + |}""".stripMargin + aliasApi.executeGetAliasesResult = ElasticSuccess(complexJson) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe Set("alias1", "alias2", "alias3") + } + + "handle single alias" in { + // Given + val jsonResponse = """{"my-index":{"aliases":{"single-alias":{}}}}""" + aliasApi.executeGetAliasesResult = ElasticSuccess(jsonResponse) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe Set("single-alias") + + verify(mockLogger).info(contains("Found 1 alias(es)")) + } + + "handle whitespace in JSON" in { + // Given + val jsonWithWhitespace = + """{ + | "my-index" : { + | "aliases" : { + | "alias1" : {} + | } + | } + |}""".stripMargin + aliasApi.executeGetAliasesResult = ElasticSuccess(jsonWithWhitespace) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe Set("alias1") + } + } + + "swapAlias" should { + + "successfully swap alias between two indexes" in { + // Given + aliasApi.executeSwapAliasResult = ElasticSuccess(true) + + // When + val result = aliasApi.swapAlias("old-index", "new-index", "my-alias") + + // Then + result.isSuccess shouldBe true + result.get shouldBe true + + verify(mockLogger).info( + "Swapping alias 'my-alias' from 'old-index' to 'new-index' (atomic operation)" + ) + verify(mockLogger).info( + "✅ Alias 'my-alias' successfully swapped from 'old-index' to 'new-index'" + ) + } + + "reject invalid old index name" in { + // When + val result = aliasApi.swapAlias("INVALID", "new-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid old index name") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("swapAlias") + + verify(mockLogger, never).info(contains("Swapping")) + } + + "reject invalid new index name" in { + // When + val result = aliasApi.swapAlias("old-index", "INVALID", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid new index name") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("swapAlias") + } + + "reject invalid alias name" in { + // When + val result = aliasApi.swapAlias("old-index", "new-index", "INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid alias name") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("swapAlias") + } + + "reject when old and new index are the same" in { + // When + val result = aliasApi.swapAlias("my-index", "my-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Old and new index cannot be the same: 'my-index'" + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("swapAlias") + } + + "fail when executeSwapAlias fails" in { + // Given + val error = ElasticError("Old index not found", statusCode = Some(404)) + aliasApi.executeSwapAliasResult = ElasticFailure(error) + + // When + val result = aliasApi.swapAlias("old-index", "new-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Old index not found" + + verify(mockLogger).info( + "Swapping alias 'my-alias' from 'old-index' to 'new-index' (atomic operation)" + ) + verify(mockLogger).error( + "❌ Failed to swap alias 'my-alias' from 'old-index' to 'new-index': Old index not found" + ) + } + + "validate in correct order: oldIndex, newIndex, alias, same index" in { + // Given - Invalid old index should fail first + val result1 = aliasApi.swapAlias("INVALID", "new-index", "my-alias") + result1.error.get.message should include("Invalid old index") + + // Given - Invalid new index should fail after old index + val result2 = aliasApi.swapAlias("old-index", "INVALID", "my-alias") + result2.error.get.message should include("Invalid new index") + + // Given - Invalid alias should fail after both indexes + val result3 = aliasApi.swapAlias("old-index", "new-index", "INVALID") + result3.error.get.message should include("Invalid alias") + + // Given - Same index check should fail last + val result4 = aliasApi.swapAlias("my-index", "my-index", "my-alias") + result4.error.get.message should include("same") + } + + "handle empty index names" in { + val result1 = aliasApi.swapAlias("", "new-index", "my-alias") + result1.isFailure shouldBe true + + val result2 = aliasApi.swapAlias("old-index", "", "my-alias") + result2.isFailure shouldBe true + } + + "handle empty alias name" in { + // When + val result = aliasApi.swapAlias("old-index", "new-index", "") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid alias") + } + + "handle network timeout" in { + // Given + val error = ElasticError( + "Request timeout", + cause = Some(new java.net.SocketTimeoutException()), + statusCode = Some(504) + ) + aliasApi.executeSwapAliasResult = ElasticFailure(error) + + // When + val result = aliasApi.swapAlias("old-index", "new-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + } + } + + "workflow scenarios" should { + + "successfully add, check, and remove alias" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeAliasExistsResult = ElasticSuccess(true) + aliasApi.executeRemoveAliasResult = ElasticSuccess(true) + + // When + val addResult = aliasApi.addAlias("my-index", "my-alias") + val existsResult = aliasApi.aliasExists("my-alias") + val removeResult = aliasApi.removeAlias("my-index", "my-alias") + + // Then + addResult.isSuccess shouldBe true + existsResult.isSuccess shouldBe true + existsResult.get shouldBe true + removeResult.isSuccess shouldBe true + } + + "successfully perform zero-downtime deployment with swapAlias" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeSwapAliasResult = ElasticSuccess(true) + + // When - Initial setup + val addResult = aliasApi.addAlias("products-v1", "products") + + // When - Deploy + val swapResult = aliasApi.swapAlias("products-v1", "products-v2", "products") + + // Then + addResult.isSuccess shouldBe true + swapResult.isSuccess shouldBe true + + verify(mockLogger).info(contains("atomic operation")) + } + + "successfully add multiple aliases to same index" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val result1 = aliasApi.addAlias("my-index", "alias1") + val result2 = aliasApi.addAlias("my-index", "alias2") + val result3 = aliasApi.addAlias("my-index", "alias3") + + // Then + result1.isSuccess shouldBe true + result2.isSuccess shouldBe true + result3.isSuccess shouldBe true + } + + "successfully add same alias to multiple indexes" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val result1 = aliasApi.addAlias("index1", "shared-alias") + val result2 = aliasApi.addAlias("index2", "shared-alias") + val result3 = aliasApi.addAlias("index3", "shared-alias") + + // Then + result1.isSuccess shouldBe true + result2.isSuccess shouldBe true + result3.isSuccess shouldBe true + } + + "handle partial failure in workflow" in { + // Given - First operation succeeds + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + val result1 = aliasApi.addAlias("my-index", "alias1") + + // Given - Second operation fails + aliasApi.executeAddAliasResult = ElasticFailure(ElasticError("Conflict")) + val result2 = aliasApi.addAlias("my-index", "alias2") + + // Then + result1.isSuccess shouldBe true + result2.isFailure shouldBe true + } + + "retrieve aliases after adding them" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeGetAliasesResult = ElasticSuccess( + """{"my-index":{"aliases":{"alias1":{},"alias2":{}}}}""" + ) + + // When + val add1 = aliasApi.addAlias("my-index", "alias1") + val add2 = aliasApi.addAlias("my-index", "alias2") + val getResult = aliasApi.getAliases("my-index") + + // Then + add1.isSuccess shouldBe true + add2.isSuccess shouldBe true + getResult.isSuccess shouldBe true + getResult.get should contain allOf ("alias1", "alias2") + } + + "verify alias existence before and after removal" in { + // Given + aliasApi.executeAliasExistsResult = ElasticSuccess(true) + aliasApi.executeRemoveAliasResult = ElasticSuccess(true) + + // When - Check exists before removal + val existsBefore = aliasApi.aliasExists("my-alias") + + // When - Remove + val removeResult = aliasApi.removeAlias("my-index", "my-alias") + + // When - Check exists after removal + aliasApi.executeAliasExistsResult = ElasticSuccess(false) + val existsAfter = aliasApi.aliasExists("my-alias") + + // Then + existsBefore.get shouldBe true + removeResult.isSuccess shouldBe true + existsAfter.get shouldBe false + } + } + + "ElasticResult integration" should { + + "work with map transformation" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess( + """{"my-index":{"aliases":{"alias1":{},"alias2":{}}}}""" + ) + + // When + val result = aliasApi.getAliases("my-index").map(_.size) + + // Then + result.isSuccess shouldBe true + result.get shouldBe 2 + } + + "work with flatMap composition" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeAliasExistsResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias("my-index", "my-alias").flatMap { _ => + aliasApi.aliasExists("my-alias") + } + + // Then + result.isSuccess shouldBe true + result.get shouldBe true + } + + "work with for-comprehension" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeGetAliasesResult = ElasticSuccess( + """{"my-index":{"aliases":{"my-alias":{}}}}""" + ) + + // When + val result = for { + _ <- aliasApi.addAlias("my-index", "my-alias") + aliases <- aliasApi.getAliases("my-index") + } yield aliases.contains("my-alias") + + // Then + result shouldBe ElasticSuccess(true) + } + + "propagate errors through transformations" in { + // Given + val error = ElasticError("Failed") + aliasApi.executeGetAliasesResult = ElasticFailure(error) + + // When + val result = aliasApi + .getAliases("my-index") + .map(_.size) + .flatMap(size => ElasticSuccess(size * 2)) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Failed" + } + + "handle chained operations with mixed results" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeAliasExistsResult = ElasticFailure(ElasticError("Check failed")) + + // When + val result = for { + _ <- aliasApi.addAlias("my-index", "my-alias") + exists <- aliasApi.aliasExists("my-alias") + } yield exists + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Check failed" + } + } + + "edge cases" should { + + "handle alias with maximum valid length" in { + // Given + val maxAlias = "a" * 255 + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias("my-index", maxAlias) + + // Then + result.isSuccess shouldBe true + } + + "handle index with maximum valid length" in { + // Given + val maxIndex = "a" * 255 + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias(maxIndex, "my-alias") + + // Then + result.isSuccess shouldBe true + } + + "handle alias names with hyphens and underscores" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias("my-index", "my-alias_v1.0-prod") + + // Then + result.isSuccess shouldBe true + } + + "handle alias names with dots" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias("my-index", "my.alias.name") + + // Then + result.isSuccess shouldBe true + } + + "handle very long error messages" in { + // Given + val longMessage = "Error: " + ("x" * 1000) + val error = ElasticError(longMessage, statusCode = Some(500)) + aliasApi.executeAddAliasResult = ElasticFailure(error) + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias("my-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Error:") + } + + "handle null response from executeGetAliases" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess(null) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isFailure shouldBe true + } + + "handle malformed JSON with missing fields" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess("""{"my-index":{}}""") + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe Set.empty + } + + "handle JSON with unexpected structure" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess("""{"unexpected":"structure"}""") + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe Set.empty + } + + "handle multiple consecutive swapAlias calls" in { + // Given + aliasApi.executeSwapAliasResult = ElasticSuccess(true) + + // When + val result1 = aliasApi.swapAlias("v1", "v2", "current") + val result2 = aliasApi.swapAlias("v2", "v3", "current") + val result3 = aliasApi.swapAlias("v3", "v4", "current") + + // Then + result1.isSuccess shouldBe true + result2.isSuccess shouldBe true + result3.isSuccess shouldBe true + + verify(mockLogger, times(3)).info(contains("atomic operation")) + } + + "handle concurrent alias operations" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeAliasExistsResult = ElasticSuccess(true) + + // When + val results = (1 to 5).map(i => aliasApi.addAlias(s"index-$i", s"alias-$i")) + + // Then + results.foreach(_.isSuccess shouldBe true) + } + } + + "error handling" should { + + "handle authentication error in addAlias" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + val error = ElasticError("Authentication failed", statusCode = Some(401)) + aliasApi.executeAddAliasResult = ElasticFailure(error) + + // When + val result = aliasApi.addAlias("my-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(401) + } + + "handle authorization error in removeAlias" in { + // Given + val error = ElasticError("Insufficient permissions", statusCode = Some(403)) + aliasApi.executeRemoveAliasResult = ElasticFailure(error) + + // When + val result = aliasApi.removeAlias("my-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(403) + } + + "handle timeout error in aliasExists" in { + // Given + val error = ElasticError( + "Request timeout", + cause = Some(new java.net.SocketTimeoutException()), + statusCode = Some(504) + ) + aliasApi.executeAliasExistsResult = ElasticFailure(error) + + // When + val result = aliasApi.aliasExists("my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + result.error.get.cause.get shouldBe a[java.net.SocketTimeoutException] + } + + "handle server error in getAliases" in { + // Given + val error = ElasticError("Internal server error", statusCode = Some(500)) + aliasApi.executeGetAliasesResult = ElasticFailure(error) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(500) + } + + "handle conflict error in swapAlias" in { + // Given + val error = ElasticError("Version conflict", statusCode = Some(409)) + aliasApi.executeSwapAliasResult = ElasticFailure(error) + + // When + val result = aliasApi.swapAlias("old-index", "new-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(409) + } + + "preserve error context through operations" in { + // Given + val error = ElasticError( + message = "Operation failed", + cause = Some(new RuntimeException("Root cause")), + statusCode = Some(500), + index = Some("my-index"), + operation = Some("internal") + ) + aliasApi.executeAddAliasResult = ElasticFailure(error) + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias("my-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Operation failed" + result.error.get.cause shouldBe defined + result.error.get.statusCode shouldBe Some(500) + } + + "handle all validation errors for addAlias" in { + // Invalid index + val result1 = aliasApi.addAlias("INVALID", "my-alias") + result1.isFailure shouldBe true + + // Invalid alias + val result2 = aliasApi.addAlias("my-index", "INVALID") + result2.isFailure shouldBe true + + // Same name + val result3 = aliasApi.addAlias("my-index", "my-index") + result3.isFailure shouldBe true + + // Index doesn't exist + aliasApi.executeIndexExistsResult = ElasticSuccess(false) + val result4 = aliasApi.addAlias("my-index", "my-alias") + result4.isFailure shouldBe true + } + + "handle all validation errors for swapAlias" in { + // Invalid old index + val result1 = aliasApi.swapAlias("INVALID", "new-index", "my-alias") + result1.isFailure shouldBe true + + // Invalid new index + val result2 = aliasApi.swapAlias("old-index", "INVALID", "my-alias") + result2.isFailure shouldBe true + + // Invalid alias + val result3 = aliasApi.swapAlias("old-index", "new-index", "INVALID") + result3.isFailure shouldBe true + + // Same indexes + val result4 = aliasApi.swapAlias("my-index", "my-index", "my-alias") + result4.isFailure shouldBe true + } + + "handle network errors gracefully" in { + // Given + val error = ElasticError( + "Connection refused", + cause = Some(new java.net.ConnectException()), + statusCode = Some(503) + ) + aliasApi.executeIndexExistsResult = ElasticFailure(error) + + // When + val result = aliasApi.addAlias("my-index", "my-alias") + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + } + + "handle JSON parsing errors in getAliases" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess("not valid json at all") + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isFailure shouldBe true + verify(mockLogger).error(contains("Failed to parse aliases JSON")) + } + } + + "logging behavior" should { + + "log debug message for addAlias" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + aliasApi.addAlias("my-index", "my-alias") + + // Then + verify(mockLogger).debug("Adding alias 'my-alias' to index 'my-index'") + } + + "log info message with emoji for successful addAlias" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + aliasApi.addAlias("my-index", "my-alias") + + // Then + verify(mockLogger).info("✅ Alias 'my-alias' successfully added to index 'my-index'") + } + + "log error message with emoji for failed addAlias" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticFailure(ElasticError("Failed")) + + // When + aliasApi.addAlias("my-index", "my-alias") + + // Then + verify(mockLogger).error("❌ Failed to add alias 'my-alias' to index 'my-index': Failed") + } + + "log debug message for removeAlias" in { + // Given + aliasApi.executeRemoveAliasResult = ElasticSuccess(true) + + // When + aliasApi.removeAlias("my-index", "my-alias") + + // Then + verify(mockLogger).debug("Removing alias 'my-alias' from index 'my-index'") + } + + "log info message with emoji for successful removeAlias" in { + // Given + aliasApi.executeRemoveAliasResult = ElasticSuccess(true) + + // When + aliasApi.removeAlias("my-index", "my-alias") + + // Then + verify(mockLogger).info("✅ Alias 'my-alias' successfully removed from index 'my-index'") + } + + "log error message with emoji for failed removeAlias" in { + // Given + aliasApi.executeRemoveAliasResult = ElasticFailure(ElasticError("Not found")) + + // When + aliasApi.removeAlias("my-index", "my-alias") + + // Then + verify(mockLogger).error( + "❌ Failed to remove alias 'my-alias' from index 'my-index': Not found" + ) + } + + "log debug message for aliasExists" in { + // Given + aliasApi.executeAliasExistsResult = ElasticSuccess(true) + + // When + aliasApi.aliasExists("my-alias") + + // Then + verify(mockLogger).debug("Checking if alias 'my-alias' exists") + } + + "log info message when alias exists" in { + // Given + aliasApi.executeAliasExistsResult = ElasticSuccess(true) + + // When + aliasApi.aliasExists("my-alias") + + // Then + verify(mockLogger).info("✅ Alias 'my-alias' exists") + } + + "log info message when alias does not exist" in { + // Given + aliasApi.executeAliasExistsResult = ElasticSuccess(false) + + // When + aliasApi.aliasExists("my-alias") + + // Then + verify(mockLogger).info("✅ Alias 'my-alias' does not exist") + } + + "log error message with emoji for failed aliasExists" in { + // Given + aliasApi.executeAliasExistsResult = ElasticFailure(ElasticError("Failed")) + + // When + aliasApi.aliasExists("my-alias") + + // Then + verify(mockLogger).error("❌ Failed to check existence of alias 'my-alias': Failed") + } + + "log debug message for getAliases" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess("""{"my-index":{"aliases":{}}}""") + + // When + aliasApi.getAliases("my-index") + + // Then + verify(mockLogger).debug("Getting aliases for index 'my-index'") + } + + "log debug and info messages for found aliases" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess( + """{"my-index":{"aliases":{"alias1":{},"alias2":{}}}}""" + ) + + // When + aliasApi.getAliases("my-index") + + // Then + verify(mockLogger).debug(contains("Found 2 alias(es)")) + verify(mockLogger).info(contains("✅ Found 2 alias(es)")) + } + + "log info message when no aliases found" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess("""{"my-index":{"aliases":{}}}""") + + // When + aliasApi.getAliases("my-index") + + // Then + verify(mockLogger).info("✅ No aliases found for index 'my-index'") + } + + "log warn message when index not found in response" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess("{}") + + // When + aliasApi.getAliases("my-index") + + // Then + verify(mockLogger).warn("Index 'my-index' not found in response") + } + + "log error message with emoji for failed getAliases" in { + // Given + aliasApi.executeGetAliasesResult = ElasticFailure(ElasticError("Failed")) + + // When + aliasApi.getAliases("my-index") + + // Then + verify(mockLogger).error("❌ Failed to get aliases for index 'my-index': Failed") + } + + "log info message for swapAlias start" in { + // Given + aliasApi.executeSwapAliasResult = ElasticSuccess(true) + + // When + aliasApi.swapAlias("old-index", "new-index", "my-alias") + + // Then + verify(mockLogger).info( + "Swapping alias 'my-alias' from 'old-index' to 'new-index' (atomic operation)" + ) + } + + "log info message with emoji for successful swapAlias" in { + // Given + aliasApi.executeSwapAliasResult = ElasticSuccess(true) + + // When + aliasApi.swapAlias("old-index", "new-index", "my-alias") + + // Then + verify(mockLogger).info( + "✅ Alias 'my-alias' successfully swapped from 'old-index' to 'new-index'" + ) + } + + "log error message with emoji for failed swapAlias" in { + // Given + aliasApi.executeSwapAliasResult = ElasticFailure(ElasticError("Failed")) + + // When + aliasApi.swapAlias("old-index", "new-index", "my-alias") + + // Then + verify(mockLogger).error( + "❌ Failed to swap alias 'my-alias' from 'old-index' to 'new-index': Failed" + ) + } + + "not log anything for validation failures" in { + // When + aliasApi.addAlias("INVALID", "my-alias") + + // Then + verify(mockLogger, never).debug(any[String]) + verify(mockLogger, never).info(any[String]) + verify(mockLogger, never).error(any[String]) + } + + "log all emojis correctly" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeAliasExistsResult = ElasticSuccess(true) + aliasApi.executeGetAliasesResult = + ElasticSuccess("""{"my-index":{"aliases":{"my-alias":{}}}}""") + + // When + aliasApi.addAlias("my-index", "my-alias") + aliasApi.aliasExists("my-alias") + aliasApi.getAliases("my-index") + + // Then + verify(mockLogger, atLeast(1)).info(contains("✅")) + } + } + + "validation order" should { + + "validate index before alias in addAlias" in { + // When + val result = aliasApi.addAlias("INVALID", "INVALID-ALIAS") + + // Then + result.error.get.message should include("Invalid index") + result.error.get.message should not include "Invalid alias" + } + + "validate alias after index in addAlias" in { + // When + val result = aliasApi.addAlias("my-index", "INVALID") + + // Then + result.error.get.message should include("Invalid alias") + } + + "validate same name after individual names in addAlias" in { + // When + val result = aliasApi.addAlias("my-index", "my-index") + + // Then + result.error.get.message should include("same name") + } + + "validate index existence last in addAlias" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(false) + + // When + val result = aliasApi.addAlias("my-index", "my-alias") + + // Then + result.error.get.message should include("does not exist") + } + + "validate index before alias in removeAlias" in { + // When + val result = aliasApi.removeAlias("INVALID", "INVALID-ALIAS") + + // Then + result.error.get.message should include("Invalid index") + result.error.get.message should not include "Invalid alias" + } + + "validate oldIndex, newIndex, alias, then same check in swapAlias" in { + // Invalid old index + val result1 = aliasApi.swapAlias("INVALID", "new-index", "my-alias") + result1.error.get.message should include("Invalid old index") + + // Invalid new index + val result2 = aliasApi.swapAlias("old-index", "INVALID", "my-alias") + result2.error.get.message should include("Invalid new index") + + // Invalid alias + val result3 = aliasApi.swapAlias("old-index", "new-index", "INVALID") + result3.error.get.message should include("Invalid alias") + + // Same indexes + val result4 = aliasApi.swapAlias("my-index", "my-index", "my-alias") + result4.error.get.message should include("same") + } + + "not call execute methods when validation fails" in { + // Given + var executeCalled = false + val validatingApi = new AliasApi with IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeAddAlias( + index: String, + alias: String + ): ElasticResult[Boolean] = { + executeCalled = true + ElasticSuccess(true) + } + + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = { + ElasticSuccess(true) + } + + override private[client] def executeRemoveAlias( + index: String, + alias: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeAliasExists(alias: String): ElasticResult[Boolean] = + ??? + override private[client] def executeGetAliases(index: String): ElasticResult[String] = ??? + override private[client] def executeSwapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + validatingApi.addAlias("INVALID", "my-alias") + + // Then + executeCalled shouldBe false + } + } + + "JSON parsing" should { + + "correctly parse aliases with filters" in { + // Given + val jsonWithFilters = + """{ + | "my-index": { + | "aliases": { + | "filtered-alias": { + | "filter": { + | "term": { + | "user": "kimchy" + | } + | } + | } + | } + | } + |}""".stripMargin + aliasApi.executeGetAliasesResult = ElasticSuccess(jsonWithFilters) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get should contain("filtered-alias") + } + + "correctly parse aliases with routing" in { + // Given + val jsonWithRouting = + """{ + | "my-index": { + | "aliases": { + | "routed-alias": { + | "routing": "1" + | } + | } + | } + |}""".stripMargin + aliasApi.executeGetAliasesResult = ElasticSuccess(jsonWithRouting) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get should contain("routed-alias") + } + + "correctly parse aliases with search and index routing" in { + // Given + val jsonWithComplexRouting = + """{ + | "my-index": { + | "aliases": { + | "complex-alias": { + | "search_routing": "1,2", + | "index_routing": "2" + | } + | } + | } + |}""".stripMargin + aliasApi.executeGetAliasesResult = ElasticSuccess(jsonWithComplexRouting) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get should contain("complex-alias") + } + + "handle null values in JSON" in { + // Given + val jsonWithNull = """{"my-index":{"aliases":{"my-alias":null}}}""" + aliasApi.executeGetAliasesResult = ElasticSuccess(jsonWithNull) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get should contain("my-alias") + } + + "handle empty aliases object" in { + // Given + val jsonEmpty = """{"my-index":{"aliases":{}}}""" + aliasApi.executeGetAliasesResult = ElasticSuccess(jsonEmpty) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe Set.empty + } + + "handle malformed JSON gracefully" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess("""{"my-index":{"aliases":""") + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isFailure shouldBe true + verify(mockLogger).error(contains("Failed to parse aliases JSON")) + } + + "handle JSON with extra fields" in { + // Given + val jsonWithExtra = + """{ + | "my-index": { + | "aliases": { + | "my-alias": {} + | }, + | "mappings": {}, + | "settings": {} + | } + |}""".stripMargin + aliasApi.executeGetAliasesResult = ElasticSuccess(jsonWithExtra) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get should contain("my-alias") + } + + "handle deeply nested JSON structure" in { + // Given + val deepJson = + """{ + | "my-index": { + | "aliases": { + | "alias1": { + | "filter": { + | "bool": { + | "must": [ + | {"term": {"field1": "value1"}}, + | {"range": {"field2": {"gte": 10}}} + | ] + | } + | } + | } + | } + | } + |}""".stripMargin + aliasApi.executeGetAliasesResult = ElasticSuccess(deepJson) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get should contain("alias1") + } + + "handle JSON with unicode characters" in { + // Given + val unicodeJson = """{"my-index":{"aliases":{"alias-café":{},"alias-日本":{}}}}""" + aliasApi.executeGetAliasesResult = ElasticSuccess(unicodeJson) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get.size shouldBe 2 + } + + "handle large JSON response with many aliases" in { + // Given + val aliases = (1 to 100).map(i => s""""alias-$i":{}""").mkString(",") + val largeJson = s"""{"my-index":{"aliases":{$aliases}}}""" + aliasApi.executeGetAliasesResult = ElasticSuccess(largeJson) + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get.size shouldBe 100 + } + } + + "concurrent operations" should { + + "handle multiple concurrent addAlias calls" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val results = (1 to 5).map(i => aliasApi.addAlias(s"index-$i", s"alias-$i")) + + // Then + results.foreach(_.isSuccess shouldBe true) + verify(mockLogger, times(5)).info(contains("successfully added")) + } + + "handle mixed operations concurrently" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeRemoveAliasResult = ElasticSuccess(true) + aliasApi.executeAliasExistsResult = ElasticSuccess(true) + aliasApi.executeGetAliasesResult = ElasticSuccess("""{"my-index":{"aliases":{}}}""") + + // When + val add = aliasApi.addAlias("index1", "alias1") + val remove = aliasApi.removeAlias("index2", "alias2") + val exists = aliasApi.aliasExists("alias3") + val get = aliasApi.getAliases("index3") + + // Then + add.isSuccess shouldBe true + remove.isSuccess shouldBe true + exists.isSuccess shouldBe true + get.isSuccess shouldBe true + } + + "handle concurrent swapAlias operations" in { + // Given + aliasApi.executeSwapAliasResult = ElasticSuccess(true) + + // When + val results = (1 to 3).map(i => aliasApi.swapAlias(s"old-$i", s"new-$i", s"alias-$i")) + + // Then + results.foreach(_.isSuccess shouldBe true) + verify(mockLogger, times(3)).info(contains("atomic operation")) + } + } + + "real-world scenarios" should { + + "support blue-green deployment pattern" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeSwapAliasResult = ElasticSuccess(true) + aliasApi.executeRemoveAliasResult = ElasticSuccess(true) + + // When - Initial setup (blue environment) + val setupBlue = aliasApi.addAlias("products-blue", "products") + + // When - Deploy green environment + val swapToGreen = aliasApi.swapAlias("products-blue", "products-green", "products") + + // When - Rollback if needed + val rollback = aliasApi.swapAlias("products-green", "products-blue", "products") + + // Then + setupBlue.isSuccess shouldBe true + swapToGreen.isSuccess shouldBe true + rollback.isSuccess shouldBe true + } + + "support time-based index pattern" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeGetAliasesResult = ElasticSuccess( + """{"logs-2024-01":{"aliases":{"logs-current":{},"logs-january":{}}}}""" + ) + + // When + val addCurrent = aliasApi.addAlias("logs-2024-01", "logs-current") + val addMonth = aliasApi.addAlias("logs-2024-01", "logs-january") + val getAliases = aliasApi.getAliases("logs-2024-01") + + // Then + addCurrent.isSuccess shouldBe true + addMonth.isSuccess shouldBe true + getAliases.get should contain allOf ("logs-current", "logs-january") + } + + "support filtered alias for multi-tenancy" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When - Create filtered aliases for different tenants + val tenant1 = aliasApi.addAlias("shared-index", "tenant1-data") + val tenant2 = aliasApi.addAlias("shared-index", "tenant2-data") + + // Then + tenant1.isSuccess shouldBe true + tenant2.isSuccess shouldBe true + } + + "support read-write split pattern" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val writeAlias = aliasApi.addAlias("products-2024", "products-write") + val readAlias1 = aliasApi.addAlias("products-2024", "products-read") + val readAlias2 = aliasApi.addAlias("products-2023", "products-read") + + // Then + writeAlias.isSuccess shouldBe true + readAlias1.isSuccess shouldBe true + readAlias2.isSuccess shouldBe true + } + + "support canary deployment" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeSwapAliasResult = ElasticSuccess(true) + + // When - Route 10% traffic to new version + val addCanary = aliasApi.addAlias("products-v2", "products-canary") + + // When - Full rollout + val fullRollout = aliasApi.swapAlias("products-v1", "products-v2", "products") + + // Then + addCanary.isSuccess shouldBe true + fullRollout.isSuccess shouldBe true + } + + "support index lifecycle management" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + aliasApi.executeSwapAliasResult = ElasticSuccess(true) + aliasApi.executeRemoveAliasResult = ElasticSuccess(true) + + // When - Hot tier + val hot = aliasApi.addAlias("logs-2024-01-01", "logs-hot") + + // When - Move to warm tier + val warm = aliasApi.swapAlias("logs-2024-01-01", "logs-2023-12-31", "logs-hot") + + // When - Archive + val archive = aliasApi.removeAlias("logs-2023-12-31", "logs-hot") + + // Then + hot.isSuccess shouldBe true + warm.isSuccess shouldBe true + archive.isSuccess shouldBe true + } + } + + "performance considerations" should { + + "handle rapid consecutive operations" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val start = System.currentTimeMillis() + (1 to 100).foreach(i => aliasApi.addAlias(s"index-$i", s"alias-$i")) + val duration = System.currentTimeMillis() - start + + // Then - Should complete reasonably fast (validation overhead only) + duration should be < 5000L // 5 seconds + } + + "not accumulate memory with repeated calls" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When - Multiple iterations + (1 to 10).foreach { iteration => + (1 to 100).foreach(i => aliasApi.addAlias(s"index-$i", s"alias-$i")) + } + + // Then - Should not throw OutOfMemoryError + succeed + } + } + + "error messages" should { + + "be descriptive for validation errors" in { + val result = aliasApi.addAlias("INVALID", "my-alias") + result.error.get.message should include("Invalid index") + result.error.get.message should include("lowercase") + } + + "include operation context" in { + val result = aliasApi.addAlias("INVALID", "my-alias") + result.error.get.operation shouldBe Some("addAlias") + } + + "include status codes" in { + val result = aliasApi.addAlias("INVALID", "my-alias") + result.error.get.statusCode shouldBe Some(400) + } + + "be clear about which parameter is invalid" in { + val result1 = aliasApi.addAlias("INVALID", "my-alias") + result1.error.get.message should include("Invalid index") + + val result2 = aliasApi.addAlias("my-index", "INVALID") + result2.error.get.message should include("Invalid alias") + + val result3 = aliasApi.swapAlias("INVALID", "new-index", "my-alias") + result3.error.get.message should include("Invalid old index") + + val result4 = aliasApi.swapAlias("old-index", "INVALID", "my-alias") + result4.error.get.message should include("Invalid new index") + } + + "preserve original error messages from execute methods" in { + // Given + val originalError = ElasticError("Custom error message", statusCode = Some(500)) + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticFailure(originalError) + + // When + val result = aliasApi.addAlias("my-index", "my-alias") + + // Then + result.error.get.message shouldBe "Custom error message" + } + } + + "boundary conditions" should { + + "handle minimum valid index name (1 char)" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias("a", "b") + + // Then + result.isSuccess shouldBe true + } + + "reject index name exceeding 255 characters" in { + // Given + val tooLong = "a" * 256 + + // When + val result = aliasApi.addAlias(tooLong, "my-alias") + + // Then + result.isFailure shouldBe true + } + + "handle alias at exactly 255 characters" in { + // Given + val maxLength = "a" * 255 + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias("my-index", maxLength) + + // Then + result.isSuccess shouldBe true + } + + "handle empty set from getAliases" in { + // Given + aliasApi.executeGetAliasesResult = ElasticSuccess("""{"my-index":{"aliases":{}}}""") + + // When + val result = aliasApi.getAliases("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe Set.empty + result.get.isEmpty shouldBe true + } + + "handle single character alias names" in { + // Given + aliasApi.executeIndexExistsResult = ElasticSuccess(true) + aliasApi.executeAddAliasResult = ElasticSuccess(true) + + // When + val result = aliasApi.addAlias("my-index", "a") + + // Then + result.isSuccess shouldBe true + } + } + } +} diff --git a/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala new file mode 100644 index 00000000..c654e379 --- /dev/null +++ b/core/src/test/scala/app/softnetwork/elastic/client/ElasticConversionSpec.scala @@ -0,0 +1,640 @@ +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.sql.Identifier +import app.softnetwork.elastic.sql.function.aggregate.ArrayAgg +import app.softnetwork.elastic.sql.query.{OrderBy, SQLAggregation} +import org.json4s.ext.{JavaTimeSerializers, JavaTypesSerializers, JodaTimeSerializers} +import org.json4s.jackson.Serialization +import org.json4s.{Formats, NoTypeHints} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +import java.time.ZonedDateTime +import scala.util.{Failure, Success} + +class ElasticConversionSpec extends AnyFlatSpec with Matchers with ElasticConversion { + + implicit val formats: Formats = + Serialization.formats(NoTypeHints) ++ + JodaTimeSerializers.all ++ + JavaTypesSerializers.all ++ + JavaTimeSerializers.all + + "elastic conversion" should "parse simple hits" in { + val results = + """{ + | "took": 5, + | "timed_out": false, + | "hits": { + | "total": { "value": 2, "relation": "eq" }, + | "max_score": 1.0, + | "hits": [ + | { + | "_index": "products", + | "_id": "1", + | "_score": 1.0, + | "_source": { + | "name": "Laptop", + | "price": 999.99, + | "category": "Electronics", + | "tags": ["computer", "portable"] + | } + | }, + | { + | "_index": "products", + | "_id": "2", + | "_score": 0.8, + | "_source": { + | "name": "Mouse", + | "price": 29.99, + | "category": "Electronics" + | } + | } + | ] + | } + |}""".stripMargin + + parseResponse(ElasticResponse("", results, Map.empty, Map.empty)) match { + case Success(rows) => + rows.foreach(println) + // Map(name -> Laptop, price -> 999.99, category -> Electronics, tags -> List(computer, portable), _id -> 1, _index -> products, _score -> 1.0) + // Map(name -> Mouse, price -> 29.99, category -> Electronics, _id -> 2, _index -> products, _score -> 0.8) + case Failure(error) => + throw error + } + } + it should "parse aggregations with top hits" in { + val results = """{ + | "took": 10, + | "hits": { "total": { "value": 100 }, "hits": [] }, + | "aggregations": { + | "category": { + | "doc_count_error_upper_bound": 0, + | "sum_other_doc_count": 0, + | "buckets": [ + | { + | "key": "Electronics", + | "doc_count": 50, + | "avg_price": { + | "value": 450.5 + | }, + | "max_price": { + | "value": 999.99 + | }, + | "top_products": { + | "hits": { + | "total": { "value": 50 }, + | "max_score": 1.0, + | "hits": [ + | { + | "_id": "1", + | "_score": 1.0, + | "_source": { + | "name": "Laptop", + | "price": 999.99, + | "stock": 15 + | } + | }, + | { + | "_id": "2", + | "_score": 0.95, + | "_source": { + | "name": "Phone", + | "price": 699.99, + | "stock": 25 + | } + | } + | ] + | } + | } + | }, + | { + | "key": "Books", + | "doc_count": 30, + | "avg_price": { + | "value": 25.0 + | }, + | "max_price": { + | "value": 45.0 + | }, + | "top_products": { + | "hits": { + | "hits": [ + | { + | "_id": "3", + | "_score": null, + | "_source": { + | "name": "Programming Book", + | "price": 45.0, + | "stock": 50 + | } + | } + | ] + | } + | } + | } + | ] + | } + | } + |}""".stripMargin + + parseResponse( + ElasticResponse( + "", + results, + Map.empty, + Map( + "top_products" -> ClientAggregation( + "top_products", + aggType = AggregationType.ArrayAgg, + distinct = false + ) + ) + ) + ) match { + case Success(rows) => + rows.foreach(println) + //HashMap(top_products -> List(HashMap(_score -> 1.0, stock -> 15, name -> Laptop, _id -> 1, price -> 999.99), HashMap(_score -> 0.95, stock -> 25, name -> Phone, _id -> 2, price -> 699.99)), max_price -> 999.99, category -> Electronics, avg_price -> 450.5, category_doc_count -> 50) + //HashMap(top_products -> List(HashMap(_score -> 0.0, stock -> 50, name -> Programming Book, _id -> 3, price -> 45.0)), max_price -> 45.0, category -> Books, avg_price -> 25.0, category_doc_count -> 30) + val products = rows.map(row => convertTo[Products](row)) + products.foreach(println) + // Products(Electronics,List(Product(Laptop,999.99,15,None), Product(Phone,699.99,25,None)),450.5) + // Products(Books,List(Product(Programming Book,45.0,50,None)),25.0) + products.size shouldBe 2 + products.count(_.category == "Electronics") shouldBe 1 + products.count(_.category == "Books") shouldBe 1 + products.minBy(_.avg_price).avg_price shouldBe 25.0 + products.maxBy(_.avg_price).avg_price shouldBe 450.5 + products.flatMap( + _.top_products.map(_.name) + ) should contain allOf ("Laptop", "Phone", "Programming Book") + case Failure(error) => + throw error + } + } + + it should "parse nested aggregations" in { + val results = """{ + | "aggregations": { + | "country": { + | "buckets": [ + | { + | "key": "France", + | "doc_count": 100, + | "city": { + | "buckets": [ + | { + | "key": "Paris", + | "doc_count": 60, + | "product": { + | "buckets": [ + | { + | "key": "Laptop", + | "doc_count": 30, + | "total_sales": { "value": 29997.0 }, + | "avg_price": { "value": 999.9 } + | }, + | { + | "key": "Phone", + | "doc_count": 30, + | "total_sales": { "value": 20997.0 }, + | "avg_price": { "value": 699.9 } + | } + | ] + | } + | }, + | { + | "key": "Lyon", + | "doc_count": 40, + | "product": { + | "buckets": [ + | { + | "key": "Tablet", + | "doc_count": 40, + | "total_sales": { "value": 15996.0 }, + | "avg_price": { "value": 399.9 } + | } + | ] + | } + | } + | ] + | } + | }, + | { + | "key": "Germany", + | "doc_count": 80, + | "city": { + | "buckets": [ + | { + | "key": "Berlin", + | "doc_count": 80, + | "product": { + | "buckets": [ + | { + | "key": "Mouse", + | "doc_count": 80, + | "total_sales": { "value": 2399.2 }, + | "avg_price": { "value": 29.99 } + | } + | ] + | } + | } + | ] + | } + | } + | ] + | } + | } + |}""".stripMargin + parseResponse(ElasticResponse("", results, Map.empty, Map.empty)) match { + case Success(rows) => + rows.foreach(println) + // Map(country -> France, country_doc_count -> 100, city -> Paris, city_doc_count -> 60, product -> Laptop, product_doc_count -> 30, total_sales -> 29997.0, avg_price -> 999.9) + // Map(country -> France, country_doc_count -> 100, city -> Paris, city_doc_count -> 60, product -> Phone, product_doc_count -> 30, total_sales -> 20997.0, avg_price -> 699.9) + // Map(country -> France, country_doc_count -> 100, city -> Lyon, city_doc_count -> 40, product -> Tablet, product_doc_count -> 40, total_sales -> 15996.0, avg_price -> 399.9) + // Map(country -> Germany, country_doc_count -> 80, city -> Berlin, city_doc_count -> 80, product -> Mouse, product_doc_count -> 80, total_sales -> 2399.2, avg_price -> 29.99) + val sales = rows.map(row => convertTo[Sales](row)) + sales.foreach(println) + // Sales(France,Paris,Laptop,29997.0,999.9) + // Sales(France,Paris,Phone,20997.0,699.9) + // Sales(France,Lyon,Tablet,15996.0,399.9) + // Sales(Germany,Berlin,Mouse,2399.2,29.99) + sales.size shouldBe 4 + sales.count(_.country == "France") shouldBe 3 + sales.count(_.country == "Germany") shouldBe 1 + sales.map(_.city) should contain allOf ("Paris", "Lyon", "Berlin") + sales + .filter(_.country == "France") + .map(_.product) should contain allOf ("Laptop", "Phone", "Tablet") + sales.filter(_.country == "Germany").map(_.product) should contain only "Mouse" + case Failure(error) => + throw error + } + } + + it should "parse date histogram aggregations" in { + val results = """{ + | "aggregations": { + | "sales_over_time": { + | "buckets": [ + | { + | "key_as_string": "2024-01-01T00:00:00.000Z", + | "key": 1704067200000, + | "doc_count": 100, + | "total_revenue": { "value": 50000.0 } + | }, + | { + | "key_as_string": "2024-02-01T00:00:00.000Z", + | "key": 1706745600000, + | "doc_count": 150, + | "total_revenue": { "value": 75000.0 } + | } + | ] + | } + | } + |}""".stripMargin + parseResponse(ElasticResponse("", results, Map.empty, Map.empty)) match { + case Success(rows) => + rows.foreach(println) + // Map(date -> 2024-01-01T00:00:00.000Z, doc_count -> 100, total_sales -> 50000.0) + // Map(date -> 2024-02-01T00:00:00.000Z, doc_count -> 150, total_sales -> 75000.0) + val history = rows.map(row => convertTo[SalesHistory](row)) + history.foreach(println) + // SalesHistory(2024-01-01T00:00,50000.0) + // SalesHistory(2024-02-01T00:00,75000.0) + history.size shouldBe 2 + history.map(_.sales_over_time.getMonthValue) should contain allOf (1, 2) + history.find(_.sales_over_time.getMonthValue == 1).get.total_revenue shouldBe 50000.0 + history.find(_.sales_over_time.getMonthValue == 2).get.total_revenue shouldBe 75000.0 + case Failure(error) => + throw error + } + } + + it should "parse aggregations with FIRST, LAST and ARRAY_AGG" in { + val results = """{ + | "took": 45, + | "timed_out": false, + | "_shards": { + | "total": 5, + | "successful": 5, + | "skipped": 0, + | "failed": 0 + | }, + | "hits": { + | "total": { + | "value": 150, + | "relation": "eq" + | }, + | "max_score": null, + | "hits": [] + | }, + | "aggregations": { + | "dept": { + | "doc_count_error_upper_bound": 0, + | "sum_other_doc_count": 0, + | "buckets": [ + | { + | "key": "Engineering", + | "doc_count": 45, + | "cnt": { + | "value": 38 + | }, + | "first_salary": { + | "hits": { + | "total": { + | "value": 45, + | "relation": "eq" + | }, + | "max_score": null, + | "hits": [ + | { + | "_index": "employees", + | "_type": "_doc", + | "_id": "1", + | "_score": null, + | "_source": { + | "salary": 55000, + | "firstName": "John" + | }, + | "sort": [ + | 1420070400000 + | ] + | } + | ] + | } + | }, + | "last_salary": { + | "hits": { + | "total": { + | "value": 45, + | "relation": "eq" + | }, + | "max_score": null, + | "hits": [ + | { + | "_index": "employees", + | "_type": "_doc", + | "_id": "45", + | "_score": null, + | "_source": { + | "salary": 95000, + | "firstName": "Sarah" + | }, + | "sort": [ + | 1672531200000 + | ] + | } + | ] + | } + | }, + | "employees": { + | "hits": { + | "total": { + | "value": 45, + | "relation": "eq" + | }, + | "max_score": null, + | "hits": [ + | { + | "_index": "employees", + | "_type": "_doc", + | "_id": "1", + | "_score": null, + | "_source": { + | "name": "John Doe" + | }, + | "sort": [ + | 1420070400000, + | 95000 + | ] + | }, + | { + | "_index": "employees", + | "_type": "_doc", + | "_id": "2", + | "_score": null, + | "_source": { + | "name": "Jane Smith" + | }, + | "sort": [ + | 1425254400000, + | 88000 + | ] + | } + | ] + | } + | } + | }, + | { + | "key": "Sales", + | "doc_count": 32, + | "cnt": { + | "value": 28 + | }, + | "first_salary": { + | "hits": { + | "total": { + | "value": 32, + | "relation": "eq" + | }, + | "max_score": null, + | "hits": [ + | { + | "_index": "employees", + | "_type": "_doc", + | "_id": "50", + | "_score": null, + | "_source": { + | "salary": 48000, + | "firstName": "Michael" + | }, + | "sort": [ + | 1388534400000 + | ] + | } + | ] + | } + | }, + | "last_salary": { + | "hits": { + | "total": { + | "value": 32, + | "relation": "eq" + | }, + | "max_score": null, + | "hits": [ + | { + | "_index": "employees", + | "_type": "_doc", + | "_id": "82", + | "_score": null, + | "_source": { + | "salary": 72000, + | "firstName": "Emily" + | }, + | "sort": [ + | 1667260800000 + | ] + | } + | ] + | } + | }, + | "employees": { + | "hits": { + | "total": { + | "value": 32, + | "relation": "eq" + | }, + | "max_score": null, + | "hits": [ + | { + | "_index": "employees", + | "_type": "_doc", + | "_id": "50", + | "_score": null, + | "_source": { + | "name": "Michael Brown" + | }, + | "sort": [ + | 1388534400000, + | 72000 + | ] + | } + | ] + | } + | } + | }, + | { + | "key": "Marketing", + | "doc_count": 28, + | "cnt": { + | "value": 25 + | }, + | "first_salary": { + | "hits": { + | "total": { + | "value": 28, + | "relation": "eq" + | }, + | "max_score": null, + | "hits": [ + | { + | "_index": "employees", + | "_type": "_doc", + | "_id": "100", + | "_score": null, + | "_source": { + | "salary": 52000, + | "firstName": "David" + | }, + | "sort": [ + | 1404172800000 + | ] + | } + | ] + | } + | }, + | "last_salary": { + | "hits": { + | "total": { + | "value": 28, + | "relation": "eq" + | }, + | "max_score": null, + | "hits": [ + | { + | "_index": "employees", + | "_type": "_doc", + | "_id": "128", + | "_score": null, + | "_source": { + | "salary": 78000, + | "firstName": "Lisa" + | }, + | "sort": [ + | 1672531200000 + | ] + | } + | ] + | } + | }, + | "employees": { + | "hits": { + | "total": { + | "value": 28, + | "relation": "eq" + | }, + | "max_score": null, + | "hits": [ + | { + | "_index": "employees", + | "_type": "_doc", + | "_id": "100", + | "_score": null, + | "_source": { + | "name": "David Wilson" + | }, + | "sort": [ + | 1404172800000, + | 78000 + | ] + | } + | ] + | } + | } + | } + | ] + | } + | }, + | "fields": { + | "hire_date": [] + | } + |}""".stripMargin + + parseResponse( + ElasticResponse( + "", + results, + Map.empty, + Map( + "employees" -> ClientAggregation( + aggName = "employees", + aggType = AggregationType.ArrayAgg, + distinct = false + ) + ) + ) + ) match { + case Success(rows) => + rows.foreach(println) + //HashMap(dept_doc_count -> 45, last_salary -> HashMap(_score -> 0.0, salary -> 95000, firstName -> Sarah, _id -> 45, _index -> employees), first_salary -> HashMap(_score -> 0.0, salary -> 55000, firstName -> John, _id -> 1, _index -> employees), cnt -> 38, employees -> List(Map(name -> John Doe, _id -> 1, _index -> employees, _score -> 0.0), Map(name -> Jane Smith, _id -> 2, _index -> employees, _score -> 0.0)), dept -> Engineering) + //HashMap(dept_doc_count -> 32, last_salary -> HashMap(_score -> 0.0, salary -> 72000, firstName -> Emily, _id -> 82, _index -> employees), first_salary -> HashMap(_score -> 0.0, salary -> 48000, firstName -> Michael, _id -> 50, _index -> employees), cnt -> 28, employees -> Map(name -> Michael Brown, _id -> 50, _index -> employees, _score -> 0.0), dept -> Sales) + //HashMap(dept_doc_count -> 28, last_salary -> HashMap(_score -> 0.0, salary -> 78000, firstName -> Lisa, _id -> 128, _index -> employees), first_salary -> HashMap(_score -> 0.0, salary -> 52000, firstName -> David, _id -> 100, _index -> employees), cnt -> 25, employees -> Map(name -> David Wilson, _id -> 100, _index -> employees, _score -> 0.0), dept -> Marketing) + case Failure(error) => + throw error + } + } +} + +case class Products(category: String, top_products: List[Product], avg_price: Double) + +case class Product( + name: String, + price: Double, + stock: Int, + tags: Option[List[String]] = None +) + +case class Sales( + country: String, + city: String, + product: String, + total_sales: Double, + avg_price: Double +) + +case class SalesHistory( + sales_over_time: ZonedDateTime, + total_revenue: Double +) diff --git a/core/src/test/scala/app/softnetwork/elastic/client/ElasticsearchVersionSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/ElasticsearchVersionSpec.scala new file mode 100644 index 00000000..06ea1384 --- /dev/null +++ b/core/src/test/scala/app/softnetwork/elastic/client/ElasticsearchVersionSpec.scala @@ -0,0 +1,75 @@ +package app.softnetwork.elastic.client + +import org.scalatest.wordspec.AnyWordSpec +import org.scalatest.matchers.should.Matchers + +class ElasticsearchVersionSpec extends AnyWordSpec with Matchers { + + "ElasticsearchVersion.parse" should { + "parse valid version strings" in { + ElasticsearchVersion.parse("7.10.2") shouldBe (7, 10, 2) + ElasticsearchVersion.parse("8.11.0") shouldBe (8, 11, 0) + ElasticsearchVersion.parse("6.8.23") shouldBe (6, 8, 23) + ElasticsearchVersion.parse("7.0") shouldBe (7, 0, 0) + ElasticsearchVersion.parse("8") shouldBe (8, 0, 0) + } + + "throw exception for invalid versions" in { + an[IllegalArgumentException] should be thrownBy { + ElasticsearchVersion.parse("invalid") + } + } + } + + "ElasticsearchVersion.isAtLeast" should { + "correctly compare versions" in { + // ES 7.10.2 + ElasticsearchVersion.isAtLeast("7.10.2", 7, 10) shouldBe true + ElasticsearchVersion.isAtLeast("7.10.2", 7, 9) shouldBe true + ElasticsearchVersion.isAtLeast("7.10.2", 7, 11) shouldBe false + ElasticsearchVersion.isAtLeast("7.10.2", 8, 0) shouldBe false + ElasticsearchVersion.isAtLeast("7.10.2", 6, 0) shouldBe true + + // ES 8.11.0 + ElasticsearchVersion.isAtLeast("8.11.0", 7, 10) shouldBe true + ElasticsearchVersion.isAtLeast("8.11.0", 8, 0) shouldBe true + ElasticsearchVersion.isAtLeast("8.11.0", 8, 11) shouldBe true + ElasticsearchVersion.isAtLeast("8.11.0", 8, 12) shouldBe false + + // ES 6.8.23 + ElasticsearchVersion.isAtLeast("6.8.23", 7, 10) shouldBe false + ElasticsearchVersion.isAtLeast("6.8.23", 6, 8) shouldBe true + ElasticsearchVersion.isAtLeast("6.8.23", 6, 9) shouldBe false + } + } + + "ElasticsearchVersion.supportsPit" should { + "return true for ES >= 7.10" in { + ElasticsearchVersion.supportsPit("7.10.0") shouldBe true + ElasticsearchVersion.supportsPit("7.10.2") shouldBe true + ElasticsearchVersion.supportsPit("7.17.0") shouldBe true + ElasticsearchVersion.supportsPit("8.0.0") shouldBe true + ElasticsearchVersion.supportsPit("8.11.0") shouldBe true + } + + "return false for ES < 7.10" in { + ElasticsearchVersion.supportsPit("7.9.3") shouldBe false + ElasticsearchVersion.supportsPit("7.0.0") shouldBe false + ElasticsearchVersion.supportsPit("6.8.23") shouldBe false + } + } + + "ElasticsearchVersion.isEs8OrHigher" should { + "return true for ES >= 8.0" in { + ElasticsearchVersion.isEs8OrHigher("8.0.0") shouldBe true + ElasticsearchVersion.isEs8OrHigher("8.11.0") shouldBe true + ElasticsearchVersion.isEs8OrHigher("9.0.0") shouldBe true + } + + "return false for ES < 8.0" in { + ElasticsearchVersion.isEs8OrHigher("7.17.0") shouldBe false + ElasticsearchVersion.isEs8OrHigher("7.10.2") shouldBe false + ElasticsearchVersion.isEs8OrHigher("6.8.23") shouldBe false + } + } +} diff --git a/core/src/test/scala/app/softnetwork/elastic/client/IndicesApiSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/IndicesApiSpec.scala new file mode 100644 index 00000000..cfd8cc50 --- /dev/null +++ b/core/src/test/scala/app/softnetwork/elastic/client/IndicesApiSpec.scala @@ -0,0 +1,1274 @@ +package app.softnetwork.elastic.client + +import org.scalatest.wordspec.AnyWordSpec +import org.scalatest.matchers.should.Matchers +import org.scalatest.BeforeAndAfterEach +import org.mockito.MockitoSugar +import org.mockito.ArgumentMatchersSugar +import org.slf4j.Logger +import app.softnetwork.elastic.client.result._ + +/** Unit tests for IndicesApi + */ +class IndicesApiSpec + extends AnyWordSpec + with Matchers + with BeforeAndAfterEach + with MockitoSugar + with ArgumentMatchersSugar { + + // Mock logger + val mockLogger: Logger = mock[Logger] + + // Concrete implementation for testing + class TestIndicesApi extends IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + // Control variables for each operation + var executeCreateIndexResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeDeleteIndexResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeCloseIndexResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeOpenIndexResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeReindexResult: ElasticResult[(Boolean, Option[Long])] = ElasticSuccess( + (true, Some(100L)) + ) + var executeIndexExistsResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeRefreshResult: ElasticResult[Boolean] = ElasticSuccess(true) + + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = { + executeCreateIndexResult + } + + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = { + executeDeleteIndexResult + } + + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = { + executeCloseIndexResult + } + + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = { + executeOpenIndexResult + } + + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = { + executeReindexResult + } + + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = { + executeIndexExistsResult + } + + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = { + executeRefreshResult + } + } + + var indicesApi: TestIndicesApi = _ + + override def beforeEach(): Unit = { + super.beforeEach() + indicesApi = new TestIndicesApi() + reset(mockLogger) + } + + "IndicesApi" should { + + "defaultSettings" should { + + "contain valid JSON" in { + // When + val settings = indicesApi.defaultSettings + + // Then + settings should not be empty + settings should include("index") + settings should include("max_ngram_diff") + settings should include("ngram_analyzer") + settings should include("ngram_tokenizer") + } + + "be valid JSON parseable" in { + // Given + import org.json4s.jackson.JsonMethods._ + + // When & Then + noException should be thrownBy { + parse(indicesApi.defaultSettings) + } + } + + "contain ngram configuration" in { + // When + val settings = indicesApi.defaultSettings + + // Then + settings should include("min_gram") + settings should include("max_gram") + settings should include("\"max_ngram_diff\": \"20\"") + } + + "contain analyzer configuration" in { + // When + val settings = indicesApi.defaultSettings + + // Then + settings should include("analyzer") + settings should include("search_analyzer") + settings should include("lowercase") + settings should include("asciifolding") + } + + "contain mapping limits" in { + // When + val settings = indicesApi.defaultSettings + + // Then + settings should include("total_fields") + settings should include("\"limit\" : \"2000\"") + } + } + + "createIndex" should { + + "successfully create index with default settings" in { + // Given + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + + // When + val result = indicesApi.createIndex("my-index") + + // Then + result shouldBe ElasticSuccess(true) + result.isSuccess shouldBe true + + verify(mockLogger).info(contains("Creating index 'my-index' with settings:")) + verify(mockLogger).info("✅ Index 'my-index' created successfully") + verify(mockLogger, never).error(any[String]) + } + + "successfully create index with custom settings" in { + // Given + val customSettings = """{"index": {"number_of_shards": 3}}""" + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + + // When + val result = indicesApi.createIndex("my-index", customSettings) + + // Then + result.isSuccess shouldBe true + + verify(mockLogger).info(contains("Creating index 'my-index' with settings:")) + verify(mockLogger).info(contains(customSettings)) + verify(mockLogger).info("✅ Index 'my-index' created successfully") + } + + "return false when index creation returns false" in { + // Given + indicesApi.executeCreateIndexResult = ElasticSuccess(false) + + // When + val result = indicesApi.createIndex("my-index") + + // Then + result shouldBe ElasticSuccess(false) + result.get shouldBe false + + verify(mockLogger).info("✅ Index 'my-index' not created") + } + + "handle creation failure" in { + // Given + val error = ElasticError("Index already exists", statusCode = Some(400)) + indicesApi.executeCreateIndexResult = ElasticFailure(error) + + // When + val result = indicesApi.createIndex("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Index already exists" + + verify(mockLogger).error("❌ Failed to create index 'my-index': Index already exists") + } + + "reject invalid index name" in { + // When + val result = indicesApi.createIndex("INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.message should include("lowercase") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("createIndex") + + verify(mockLogger, never).info(any[String]) + } + + "reject empty index name" in { + // When + val result = indicesApi.createIndex("") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.message should include("cannot be empty") + } + + "reject invalid JSON settings" in { + // Given + val invalidJson = """{"index": invalid json}""" + + // When + val result = indicesApi.createIndex("my-index", invalidJson) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid settings") + result.error.get.message should include("Invalid JSON") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("createIndex") + + verify(mockLogger, never).info(any[String]) + } + + "reject empty settings" in { + // When + val result = indicesApi.createIndex("my-index", "") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid settings") + result.error.get.message should include("cannot be empty") + } + + "reject null settings" in { + // When + val result = indicesApi.createIndex("my-index", null) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid settings") + } + + "handle network error during creation" in { + // Given + val error = ElasticError( + "Connection timeout", + cause = Some(new java.net.SocketTimeoutException()), + statusCode = Some(504) + ) + indicesApi.executeCreateIndexResult = ElasticFailure(error) + + // When + val result = indicesApi.createIndex("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + + verify(mockLogger).error(contains("Connection timeout")) + } + + "validate index name before settings" in { + // Given + val invalidJson = """invalid""" + + // When - Invalid index name should fail first + val result = indicesApi.createIndex("INVALID", invalidJson) + + // Then + result.error.get.message should include("Invalid index") + result.error.get.message should include("lowercase") + } + } + + "deleteIndex" should { + + "successfully delete existing index" in { + // Given + indicesApi.executeDeleteIndexResult = ElasticSuccess(true) + + // When + val result = indicesApi.deleteIndex("my-index") + + // Then + result shouldBe ElasticSuccess(true) + + verify(mockLogger).info("Deleting index 'my-index'") + verify(mockLogger).info("✅ Index 'my-index' deleted successfully") + } + + "return false when deletion returns false" in { + // Given + indicesApi.executeDeleteIndexResult = ElasticSuccess(false) + + // When + val result = indicesApi.deleteIndex("my-index") + + // Then + result shouldBe ElasticSuccess(false) + + verify(mockLogger).info("✅ Index 'my-index' not deleted") + } + + "handle deletion failure" in { + // Given + val error = ElasticError("Index not found", statusCode = Some(404)) + indicesApi.executeDeleteIndexResult = ElasticFailure(error) + + // When + val result = indicesApi.deleteIndex("my-index") + + // Then + result.isFailure shouldBe true + + verify(mockLogger).error("❌ Failed to delete index 'my-index': Index not found") + } + + "reject invalid index name" in { + // When + val result = indicesApi.deleteIndex("INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.operation shouldBe Some("deleteIndex") + + verify(mockLogger, never).info(any[String]) + } + + "reject empty index name" in { + // When + val result = indicesApi.deleteIndex("") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("cannot be empty") + } + } + + "closeIndex" should { + + "successfully close index" in { + // Given + indicesApi.executeCloseIndexResult = ElasticSuccess(true) + + // When + val result = indicesApi.closeIndex("my-index") + + // Then + result shouldBe ElasticSuccess(true) + + verify(mockLogger).info("Closing index 'my-index'") + verify(mockLogger).info("✅ Index 'my-index' closed successfully") + } + + "return false when close returns false" in { + // Given + indicesApi.executeCloseIndexResult = ElasticSuccess(false) + + // When + val result = indicesApi.closeIndex("my-index") + + // Then + result shouldBe ElasticSuccess(false) + + verify(mockLogger).info("✅ Index 'my-index' not closed") + } + + "handle close failure" in { + // Given + val error = ElasticError("Cannot close index", statusCode = Some(500)) + indicesApi.executeCloseIndexResult = ElasticFailure(error) + + // When + val result = indicesApi.closeIndex("my-index") + + // Then + result.isFailure shouldBe true + + verify(mockLogger).error("❌ Failed to close index 'my-index': Cannot close index") + } + + "reject invalid index name" in { + // When + val result = indicesApi.closeIndex("INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.operation shouldBe Some("closeIndex") + } + } + + "openIndex" should { + + "successfully open index" in { + // Given + indicesApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = indicesApi.openIndex("my-index") + + // Then + result shouldBe ElasticSuccess(true) + + verify(mockLogger).info("Opening index 'my-index'") + verify(mockLogger).info("✅ Index 'my-index' opened successfully") + } + + "return false when open returns false" in { + // Given + indicesApi.executeOpenIndexResult = ElasticSuccess(false) + + // When + val result = indicesApi.openIndex("my-index") + + // Then + result shouldBe ElasticSuccess(false) + + verify(mockLogger).info("✅ Index 'my-index' not opened") + } + + "handle open failure" in { + // Given + val error = ElasticError("Cannot open index", statusCode = Some(500)) + indicesApi.executeOpenIndexResult = ElasticFailure(error) + + // When + val result = indicesApi.openIndex("my-index") + + // Then + result.isFailure shouldBe true + + verify(mockLogger).error("❌ Failed to open index 'my-index': Cannot open index") + } + + "reject invalid index name" in { + // When + val result = indicesApi.openIndex("INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.operation shouldBe Some("openIndex") + } + } + + "indexExists" should { + + "return true when index exists" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + + // When + val result = indicesApi.indexExists("my-index") + + // Then + result shouldBe ElasticSuccess(true) + + verify(mockLogger).debug("Checking if index 'my-index' exists") + verify(mockLogger).debug("✅ Index 'my-index' exists") + } + + "return false when index does not exist" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(false) + + // When + val result = indicesApi.indexExists("my-index") + + // Then + result shouldBe ElasticSuccess(false) + + verify(mockLogger).debug("✅ Index 'my-index' does not exist") + } + + "handle existence check failure" in { + // Given + val error = ElasticError("Connection error", statusCode = Some(503)) + indicesApi.executeIndexExistsResult = ElasticFailure(error) + + // When + val result = indicesApi.indexExists("my-index") + + // Then + result.isFailure shouldBe true + + verify(mockLogger).error( + "❌ Failed to check existence of index 'my-index': Connection error" + ) + } + + "reject invalid index name" in { + // When + val result = indicesApi.indexExists("INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.operation shouldBe Some("indexExists") + } + } + + "reindex" should { + + "successfully reindex with refresh" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + indicesApi.executeReindexResult = ElasticSuccess((true, Some(100L))) + indicesApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = indicesApi.reindex("source-index", "target-index", refresh = true) + + // Then + result.isSuccess shouldBe true + result.get shouldBe ((true, Some(100L))) + + verify(mockLogger).info("Reindexing from 'source-index' to 'target-index' (refresh=true)") + verify(mockLogger).info( + "✅ Reindex from 'source-index' to 'target-index' succeeded (100 documents)" + ) + verify(mockLogger).debug("✅ Target index 'target-index' refreshed") + } + + "successfully reindex without refresh" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + indicesApi.executeReindexResult = ElasticSuccess((true, Some(50L))) + + // When + val result = indicesApi.reindex("source-index", "target-index", refresh = false) + + // Then + result.isSuccess shouldBe true + result.get shouldBe ((true, Some(50L))) + + verify(mockLogger).info("Reindexing from 'source-index' to 'target-index' (refresh=false)") + verify(mockLogger).info( + "✅ Reindex from 'source-index' to 'target-index' succeeded (50 documents)" + ) + verify(mockLogger, never).debug(contains("refreshed")) + } + + "successfully reindex without document count" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + indicesApi.executeReindexResult = ElasticSuccess((true, None)) + + // When + val result = indicesApi.reindex("source-index", "target-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe ((true, None)) + + verify(mockLogger).info("✅ Reindex from 'source-index' to 'target-index' succeeded") + } + + "handle reindex failure" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + val error = ElasticError("Reindex failed", statusCode = Some(500)) + indicesApi.executeReindexResult = ElasticFailure(error) + + // When + val result = indicesApi.reindex("source-index", "target-index") + + // Then + result.isFailure shouldBe true + + verify(mockLogger).error("Reindex failed for index 'target-index': Reindex failed") + } + + "handle reindex returning false" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + indicesApi.executeReindexResult = ElasticSuccess((false, None)) + + // When + val result = indicesApi.reindex("source-index", "target-index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Reindex failed for index 'target-index'") + result.error.get.operation shouldBe Some("reindex") + } + + "succeed even if refresh fails after successful reindex" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + indicesApi.executeReindexResult = ElasticSuccess((true, Some(100L))) + indicesApi.executeRefreshResult = ElasticFailure(ElasticError("Refresh failed")) + + // When + val result = indicesApi.reindex("source-index", "target-index", refresh = true) + + // Then + result.isSuccess shouldBe true + result.get shouldBe ((true, Some(100L))) + + verify(mockLogger).warn(contains("Refresh failed but reindex succeeded")) + } + + "reject when source and target are the same" in { + // When + val result = indicesApi.reindex("same-index", "same-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Source and target index cannot be the same" + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("reindex") + + verify(mockLogger, never).info(contains("Reindexing")) + } + + "reject invalid source index name" in { + // When + val result = indicesApi.reindex("INVALID", "target-index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid source index") + result.error.get.operation shouldBe Some("reindex") + } + + "reject invalid target index name" in { + // When + val result = indicesApi.reindex("source-index", "INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid target index") + result.error.get.operation shouldBe Some("reindex") + } + + "fail when source index does not exist" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(false) + + // When + val result = indicesApi.reindex("source-index", "target-index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Source index 'source-index' does not exist") + result.error.get.statusCode shouldBe Some(404) + } + + "fail when target index does not exist" in { + // Given + var callCount = 0 + val checkingApi = new IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = { + callCount += 1 + if (callCount == 1) ElasticSuccess(true) // source exists + else ElasticSuccess(false) // target doesn't exist + } + + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + val result = checkingApi.reindex("source-index", "target-index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Target index 'target-index' does not exist") + result.error.get.statusCode shouldBe Some(404) + } + + "fail when source existence check fails" in { + // Given + val error = ElasticError("Connection error", statusCode = Some(503)) + indicesApi.executeIndexExistsResult = ElasticFailure(error) + + // When + val result = indicesApi.reindex("source-index", "target-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Connection error" + } + + "fail when target existence check fails" in { + // Given + var callCount = 0 + val checkingApi = new IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = { + callCount += 1 + if (callCount == 1) ElasticSuccess(true) // source exists + else ElasticFailure(ElasticError("Connection error")) // target check fails + } + + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + val result = checkingApi.reindex("source-index", "target-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Connection error" + } + } + + "ElasticResult integration" should { + + "work with map transformation" in { + // Given + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + + // When + val result = indicesApi.createIndex("my-index").map { success => + if (success) "Created" else "Not created" + } + + // Then + result shouldBe ElasticSuccess("Created") + } + + "work with flatMap composition" in { + // Given + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + + // When + val result = indicesApi.createIndex("my-index").flatMap { _ => + indicesApi.indexExists("my-index") + } + + // Then + result shouldBe ElasticSuccess(true) + } + + "work with for-comprehension" in { + // Given + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + indicesApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = for { + created <- indicesApi.createIndex("my-index") + opened <- indicesApi.openIndex("my-index") + } yield created && opened + + // Then + result shouldBe ElasticSuccess(true) + } + + "propagate errors through transformations" in { + // Given + val error = ElasticError("Creation failed") + indicesApi.executeCreateIndexResult = ElasticFailure(error) + + // When + val result = indicesApi + .createIndex("my-index") + .map(!_) + .flatMap(v => ElasticSuccess(s"Result: $v")) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Creation failed" + } + } + + "sequential operations" should { + + "handle create, close, open, delete workflow" in { + // Given + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + indicesApi.executeCloseIndexResult = ElasticSuccess(true) + indicesApi.executeOpenIndexResult = ElasticSuccess(true) + indicesApi.executeDeleteIndexResult = ElasticSuccess(true) + + // When + val created = indicesApi.createIndex("test-index") + val closed = indicesApi.closeIndex("test-index") + val opened = indicesApi.openIndex("test-index") + val deleted = indicesApi.deleteIndex("test-index") + + // Then + created.isSuccess shouldBe true + closed.isSuccess shouldBe true + opened.isSuccess shouldBe true + deleted.isSuccess shouldBe true + + verify(mockLogger).info(contains("created successfully")) + verify(mockLogger).info(contains("closed successfully")) + verify(mockLogger).info(contains("opened successfully")) + verify(mockLogger).info(contains("deleted successfully")) + } + + "handle multiple index operations" in { + // Given + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + + // When + val result1 = indicesApi.createIndex("index1") + val result2 = indicesApi.createIndex("index2") + val result3 = indicesApi.createIndex("index3") + + // Then + result1.isSuccess shouldBe true + result2.isSuccess shouldBe true + result3.isSuccess shouldBe true + + verify(mockLogger, times(3)).info(contains("created successfully")) + } + } + + "error handling" should { + + "handle authentication error" in { + // Given + val error = ElasticError("Authentication failed", statusCode = Some(401)) + indicesApi.executeCreateIndexResult = ElasticFailure(error) + + // When + val result = indicesApi.createIndex("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(401) + } + + "handle authorization error" in { + // Given + val error = ElasticError("Insufficient permissions", statusCode = Some(403)) + indicesApi.executeDeleteIndexResult = ElasticFailure(error) + + // When + val result = indicesApi.deleteIndex("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(403) + } + + "handle timeout error" in { + // Given + val error = ElasticError( + "Request timeout", + cause = Some(new java.net.SocketTimeoutException()), + statusCode = Some(504) + ) + indicesApi.executeCloseIndexResult = ElasticFailure(error) + + // When + val result = indicesApi.closeIndex("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + } + + "handle server error" in { + // Given + val error = ElasticError("Internal server error", statusCode = Some(500)) + indicesApi.executeOpenIndexResult = ElasticFailure(error) + + // When + val result = indicesApi.openIndex("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(500) + } + } + + "validation order" should { + + "validate index name before calling execute methods" in { + // Given + var executeCalled = false + val validatingApi = new IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = { + executeCalled = true + ElasticSuccess(true) + } + + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + validatingApi.createIndex("INVALID") + + // Then + executeCalled shouldBe false + } + + "validate settings after index name validation" in { + // Given + var executeCalled = false + val validatingApi = new IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = { + executeCalled = true + ElasticSuccess(true) + } + + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + validatingApi.createIndex("valid-index", "invalid json") + + // Then + executeCalled shouldBe false + } + + "validate both indices in reindex before existence checks" in { + // Given + var existsCheckCalled = false + val validatingApi = new IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = { + existsCheckCalled = true + ElasticSuccess(true) + } + + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + validatingApi.reindex("INVALID", "target") + + // Then + existsCheckCalled shouldBe false + } + + "check source and target are different before existence checks" in { + // Given + var existsCheckCalled = false + val validatingApi = new IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = { + existsCheckCalled = true + ElasticSuccess(true) + } + + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + validatingApi.reindex("same-index", "same-index") + + // Then + existsCheckCalled shouldBe false + } + } + + "logging levels" should { + + "use info for successful operations" in { + // Given + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + + // When + indicesApi.createIndex("my-index") + + // Then + verify(mockLogger, atLeastOnce).info(any[String]) + verify(mockLogger, never).error(any[String]) + } + + "use error for failed operations" in { + // Given + indicesApi.executeCreateIndexResult = ElasticFailure(ElasticError("Failed")) + + // When + indicesApi.createIndex("my-index") + + // Then + verify(mockLogger).error(any[String]) + } + + "use debug for existence checks" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + + // When + indicesApi.indexExists("my-index") + + // Then + verify(mockLogger, atLeastOnce).debug(any[String]) + verify(mockLogger, never).info(any[String]) + verify(mockLogger, never).error(any[String]) + } + + "use warn when refresh fails but reindex succeeds" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + indicesApi.executeReindexResult = ElasticSuccess((true, Some(100L))) + indicesApi.executeRefreshResult = ElasticFailure(ElasticError("Refresh failed")) + + // When + indicesApi.reindex("source", "target", refresh = true) + + // Then + verify(mockLogger).warn(contains("Refresh failed but reindex succeeded")) + } + } + + "edge cases" should { + + "handle index name with maximum length" in { + // Given + val maxName = "a" * 255 + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + + // When + val result = indicesApi.createIndex(maxName) + + // Then + result.isSuccess shouldBe true + } + + "handle reindex with zero documents" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + indicesApi.executeReindexResult = ElasticSuccess((true, Some(0L))) + + // When + val result = indicesApi.reindex("source", "target") + + // Then + result.isSuccess shouldBe true + result.get._2 shouldBe Some(0L) + + verify(mockLogger).info(contains("(0 documents)")) + } + + "handle reindex with large document count" in { + // Given + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + indicesApi.executeReindexResult = ElasticSuccess((true, Some(1000000L))) + + // When + val result = indicesApi.reindex("source", "target") + + // Then + result.isSuccess shouldBe true + result.get._2 shouldBe Some(1000000L) + + verify(mockLogger).info(contains("(1000000 documents)")) + } + + "handle custom settings with complex JSON" in { + // Given + val complexSettings = + """{ + | "index": { + | "number_of_shards": 5, + | "number_of_replicas": 2, + | "analysis": { + | "analyzer": { + | "custom_analyzer": { + | "type": "custom", + | "tokenizer": "standard" + | } + | } + | } + | } + |}""".stripMargin + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + + // When + val result = indicesApi.createIndex("my-index", complexSettings) + + // Then + result.isSuccess shouldBe true + verify(mockLogger).info(contains(complexSettings)) + } + + "handle settings with whitespace variations" in { + // Given + val settings = """ {"index": {"number_of_shards": 1}} """ + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + + // When + val result = indicesApi.createIndex("my-index", settings) + + // Then + result.isSuccess shouldBe true + } + } + + "complex scenarios" should { + + "handle create-reindex-delete workflow" in { + // Given + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + indicesApi.executeReindexResult = ElasticSuccess((true, Some(50L))) + indicesApi.executeDeleteIndexResult = ElasticSuccess(true) + + // When + val created = indicesApi.createIndex("new-index") + val reindexed = indicesApi.reindex("old-index", "new-index") + val deleted = indicesApi.deleteIndex("old-index") + + // Then + created.isSuccess shouldBe true + reindexed.isSuccess shouldBe true + deleted.isSuccess shouldBe true + } + + "handle multiple operations with mixed results" in { + // Given + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + val result1 = indicesApi.createIndex("index1") + + indicesApi.executeCreateIndexResult = ElasticFailure(ElasticError("Already exists")) + val result2 = indicesApi.createIndex("index2") + + indicesApi.executeCreateIndexResult = ElasticSuccess(false) + val result3 = indicesApi.createIndex("index3") + + // Then + result1.isSuccess shouldBe true + result2.isFailure shouldBe true + result3.isSuccess shouldBe true + result3.get shouldBe false + } + + "handle reindex with all validation steps" in { + // Given - Valid indices that exist + indicesApi.executeIndexExistsResult = ElasticSuccess(true) + indicesApi.executeReindexResult = ElasticSuccess((true, Some(100L))) + indicesApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = indicesApi.reindex("source-index", "target-index", refresh = true) + + // Then + result.isSuccess shouldBe true + + // Verify all steps were logged + verify(mockLogger).info(contains("Reindexing from")) + verify(mockLogger).info(contains("succeeded")) + verify(mockLogger).debug(contains("refreshed")) + } + } + + "thread safety considerations" should { + + "handle concurrent index operations" in { + // Given + indicesApi.executeCreateIndexResult = ElasticSuccess(true) + + // When - Simulate concurrent calls + val results = (1 to 5).map(i => indicesApi.createIndex(s"index-$i")) + + // Then + results.foreach(_.isSuccess shouldBe true) + verify(mockLogger, times(5)).info(contains("created successfully")) + } + } + } +} diff --git a/core/src/test/scala/app/softnetwork/elastic/client/LogVerificationHelper.scala b/core/src/test/scala/app/softnetwork/elastic/client/LogVerificationHelper.scala new file mode 100644 index 00000000..d4b2623f --- /dev/null +++ b/core/src/test/scala/app/softnetwork/elastic/client/LogVerificationHelper.scala @@ -0,0 +1,32 @@ +package app.softnetwork.elastic.client + +import org.mockito.{ArgumentCaptor, MockitoSugar} +import org.scalatest.matchers.should.Matchers +import org.slf4j.Logger + +trait LogVerificationHelper { _: Matchers with MockitoSugar => + def captureAndVerifyLog( + logger: Logger, + level: String, + expectedMessages: String* + )(implicit pos: org.scalactic.source.Position): Unit = { + import scala.jdk.CollectionConverters._ + + val captor: ArgumentCaptor[String] = ArgumentCaptor.forClass(classOf[String]) + + level match { + case "info" => verify(logger, atLeastOnce).info(captor.capture()) + case "error" => verify(logger, atLeastOnce).error(captor.capture()) + case "warn" => verify(logger, atLeastOnce).warn(captor.capture()) + case "debug" => verify(logger, atLeastOnce).debug(captor.capture()) + } + + val allMessages = captor.getAllValues.asScala.mkString("\n") + + expectedMessages.foreach { expected => + withClue(s"Expected message not found: '$expected'\nAll messages:\n$allMessages\n") { + allMessages should include(expected) + } + } + } +} diff --git a/core/src/test/scala/app/softnetwork/elastic/client/MappingApiSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/MappingApiSpec.scala new file mode 100644 index 00000000..e2ce89e7 --- /dev/null +++ b/core/src/test/scala/app/softnetwork/elastic/client/MappingApiSpec.scala @@ -0,0 +1,2438 @@ +package app.softnetwork.elastic.client + +import org.scalatest.wordspec.AnyWordSpec +import org.scalatest.matchers.should.Matchers +import org.scalatest.BeforeAndAfterEach +import org.mockito.{ArgumentMatchersSugar, MockitoSugar} +import org.slf4j.Logger +import app.softnetwork.elastic.client.result._ + +/** Unit tests for MappingApi + */ +class MappingApiSpec + extends AnyWordSpec + with LogVerificationHelper + with Matchers + with BeforeAndAfterEach + with MockitoSugar + with ArgumentMatchersSugar { + + // Mock logger + val mockLogger: Logger = mock[Logger] + + // Valid test data + val validMapping: String = """{"properties":{"name":{"type":"text"}}}""" + val validSettings: String = """{"my-index":{"settings":{"index":{"number_of_shards":"1"}}}}""" + val updatedMapping: String = + """{"properties":{"name":{"type":"text"},"age":{"type":"integer"}}}""" + + // Concrete implementation for testing + class TestMappingApi extends MappingApi with SettingsApi with IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + // Control variables + var executeSetMappingResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeGetMappingResult: ElasticResult[String] = ElasticSuccess(validMapping) + var executeIndexExistsResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeCreateIndexResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeDeleteIndexResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeReindexFunction + : (String, String, Boolean) => ElasticResult[(Boolean, Option[Long])] = + (_, _, _) => ElasticSuccess((true, Some(100L))) + var executeLoadSettingsResult: ElasticResult[String] = ElasticSuccess(validSettings) + var executeOpenIndexResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeCloseIndexResult: ElasticResult[Boolean] = ElasticSuccess(true) + + override private[client] def executeSetMapping( + index: String, + mapping: String + ): ElasticResult[Boolean] = { + executeSetMappingResult + } + + override private[client] def executeGetMapping(index: String): ElasticResult[String] = { + executeGetMappingResult + } + + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = { + executeIndexExistsResult + } + + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = { + executeCreateIndexResult + } + + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = { + executeDeleteIndexResult + } + + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = { + executeReindexFunction(sourceIndex, targetIndex, refresh) + } + + override private[client] def executeLoadSettings(index: String): ElasticResult[String] = { + executeLoadSettingsResult + } + + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = { + executeOpenIndexResult + } + + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = { + executeCloseIndexResult + } + + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = + ElasticSuccess(true) + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): ElasticResult[Boolean] = ElasticSuccess(true) + } + + var mappingApi: TestMappingApi = _ + + override def beforeEach(): Unit = { + super.beforeEach() + mappingApi = new TestMappingApi() + reset(mockLogger) + } + + "MappingApi" should { + + "setMapping" should { + + "successfully set mapping on valid index" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", validMapping) + + // Then + result.isSuccess shouldBe true + result.get shouldBe true + + verify(mockLogger).debug(s"Setting mapping for index 'my-index': $validMapping") + verify(mockLogger).info("✅ Mapping for index 'my-index' updated successfully") + } + + "reject invalid index name" in { + // When + val result = mappingApi.setMapping("INVALID", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.message should include("lowercase") + result.error.get.statusCode shouldBe Some(400) + result.error.get.index shouldBe Some("INVALID") + result.error.get.operation shouldBe Some("setMapping") + + verify(mockLogger, never).debug(any[String]) + } + + "reject empty index name" in { + // When + val result = mappingApi.setMapping("", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.message should include("cannot be empty") + } + + "reject null index name" in { + // When + val result = mappingApi.setMapping(null, validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + } + + "reject invalid JSON mapping" in { + // When + val result = mappingApi.setMapping("my-index", "invalid json {") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid mapping") + result.error.get.statusCode shouldBe Some(400) + result.error.get.index shouldBe Some("my-index") + result.error.get.operation shouldBe Some("setMapping") + + verify(mockLogger, never).debug(any[String]) + } + + "reject empty mapping" in { + // When + val result = mappingApi.setMapping("my-index", "") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid mapping") + } + + "reject null mapping" in { + // When + val result = mappingApi.setMapping("my-index", null) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid mapping") + } + + "validate index before mapping" in { + // When - Invalid index should fail first + val result = mappingApi.setMapping("INVALID", "invalid json") + + // Then + result.error.get.message should include("Invalid index") + result.error.get.message should not include "Invalid mapping" + } + + "log info when mapping not updated" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(false) + + // When + val result = mappingApi.setMapping("my-index", validMapping) + + // Then + result.isSuccess shouldBe true + result.get shouldBe false + + verify(mockLogger).debug(contains("Setting mapping")) + verify(mockLogger).info("✅ Mapping for index 'my-index' not updated") + } + + "fail when executeSetMapping fails" in { + // Given + val error = ElasticError("Mapping conflict", statusCode = Some(400)) + mappingApi.executeSetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.setMapping("my-index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Mapping conflict" + + verify(mockLogger).debug(contains("Setting mapping")) + verify(mockLogger).error( + "❌ Failed to update mapping for index 'my-index': Mapping conflict" + ) + } + + "handle complex mapping with nested fields" in { + // Given + val complexMapping = + """{ + | "properties": { + | "user": { + | "type": "nested", + | "properties": { + | "name": {"type": "text"}, + | "age": {"type": "integer"} + | } + | } + | } + |}""".stripMargin + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", complexMapping) + + // Then + result.isSuccess shouldBe true + } + + "handle mapping with analyzers" in { + // Given + val mappingWithAnalyzer = + """{ + | "properties": { + | "title": { + | "type": "text", + | "analyzer": "standard" + | } + | } + |}""".stripMargin + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", mappingWithAnalyzer) + + // Then + result.isSuccess shouldBe true + } + + "handle mapping with multiple field types" in { + // Given + val multiFieldMapping = + """{ + | "properties": { + | "name": {"type": "text"}, + | "age": {"type": "integer"}, + | "created": {"type": "date"}, + | "active": {"type": "boolean"} + | } + |}""".stripMargin + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", multiFieldMapping) + + // Then + result.isSuccess shouldBe true + } + + "reject mapping with special characters in index name" in { + // When + val result = mappingApi.setMapping("my*index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + } + + "handle very large mapping" in { + // Given + val fields = (1 to 100).map(i => s""""field$i":{"type":"text"}""").mkString(",") + val largeMapping = s"""{"properties":{$fields}}""" + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", largeMapping) + + // Then + result.isSuccess shouldBe true + } + } + + "getMapping" should { + + "successfully get mapping for valid index" in { + // Given + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + + // When + val result = mappingApi.getMapping("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe validMapping + + verify(mockLogger).debug("Getting mapping for index 'my-index'") + } + + "reject invalid index name" in { + // When + val result = mappingApi.getMapping("INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.statusCode shouldBe Some(400) + result.error.get.index shouldBe Some("INVALID") + result.error.get.operation shouldBe Some("getMapping") + + verify(mockLogger, never).debug(any[String]) + } + + "reject empty index name" in { + // When + val result = mappingApi.getMapping("") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + } + + "reject null index name" in { + // When + val result = mappingApi.getMapping(null) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + } + + "fail when executeGetMapping fails" in { + // Given + val error = ElasticError("Index not found", statusCode = Some(404)) + mappingApi.executeGetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.getMapping("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Index not found" + result.error.get.statusCode shouldBe Some(404) + + verify(mockLogger).debug("Getting mapping for index 'my-index'") + } + + "return complex mapping" in { + // Given + val complexMapping = + """{ + | "properties": { + | "user": { + | "type": "nested", + | "properties": { + | "name": {"type": "text"}, + | "email": {"type": "keyword"} + | } + | } + | } + |}""".stripMargin + mappingApi.executeGetMappingResult = ElasticSuccess(complexMapping) + + // When + val result = mappingApi.getMapping("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe complexMapping + } + + "handle empty mapping response" in { + // Given + mappingApi.executeGetMappingResult = ElasticSuccess("{}") + + // When + val result = mappingApi.getMapping("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe "{}" + } + + "handle authentication error" in { + // Given + val error = ElasticError("Authentication failed", statusCode = Some(401)) + mappingApi.executeGetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.getMapping("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(401) + } + } + + "getMappingProperties" should { + + "delegate to getMapping" in { + // Given + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + + // When + val result = mappingApi.getMappingProperties("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe validMapping + + verify(mockLogger).debug("Getting mapping for index 'my-index'") + } + + "reject invalid index name" in { + // When + val result = mappingApi.getMappingProperties("INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + } + + "fail when getMapping fails" in { + // Given + val error = ElasticError("Failed", statusCode = Some(500)) + mappingApi.executeGetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.getMappingProperties("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Failed" + } + } + + "shouldUpdateMapping" should { + + "return true when mapping is different" in { + // Given + val currentMapping = """{"properties":{"name":{"type":"text"}}}""" + val newMapping = """{"properties":{"name":{"type":"text"},"age":{"type":"integer"}}}""" + mappingApi.executeGetMappingResult = ElasticSuccess(currentMapping) + + // When + val result = mappingApi.shouldUpdateMapping("my-index", newMapping) + + // Then + result.isSuccess shouldBe true + result.get shouldBe true + } + + "return false when mapping is identical" in { + // Given + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + + // When + val result = mappingApi.shouldUpdateMapping("my-index", validMapping) + + // Then + result.isSuccess shouldBe true + result.get shouldBe false + } + + "fail when getMapping fails" in { + // Given + val error = ElasticError("Index not found", statusCode = Some(404)) + mappingApi.executeGetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.shouldUpdateMapping("my-index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Index not found" + } + + "handle complex mapping comparison" in { + // Given + val mapping1 = + """{"properties":{"user":{"type":"nested","properties":{"name":{"type":"text"}}}}}""" + val mapping2 = + """{"properties":{"user":{"type":"nested","properties":{"name":{"type":"keyword"}}}}}""" + mappingApi.executeGetMappingResult = ElasticSuccess(mapping1) + + // When + val result = mappingApi.shouldUpdateMapping("my-index", mapping2) + + // Then + result.isSuccess shouldBe true + result.get shouldBe true + } + } + + "updateMapping" should { + + "create index with mapping when index does not exist" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", validMapping, validSettings) + + // Then + result.isSuccess shouldBe true + result.get shouldBe true + + verify(mockLogger).info("Creating new index 'my-index' with mapping") + verify(mockLogger, atLeastOnce).info("✅ Index 'my-index' created successfully") + verify(mockLogger, atLeastOnce).info("✅ Mapping for index 'my-index' set successfully") + } + + "do nothing when mapping is up to date" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + + // When + val result = mappingApi.updateMapping("my-index", validMapping) + + // Then + result.isSuccess shouldBe true + result.get shouldBe true + + verify(mockLogger).info("✅ Mapping for index 'my-index' is already up to date") + verify(mockLogger, never).info(contains("migration")) + } + + "migrate mapping when index exists and mapping is different" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping, validSettings) + + // Then + result.isSuccess shouldBe true + result.get shouldBe true + + verify(mockLogger).info(contains("Mapping for index 'my-index' needs update")) + verify(mockLogger, atLeastOnce).info(contains("Starting migration")) + verify(mockLogger).info(contains("✅ Backed up original mapping")) + verify(mockLogger, atLeastOnce).info(contains("✅ Migration completed successfully")) + } + + "fail when index creation fails" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + val error = ElasticError("Creation failed", statusCode = Some(400)) + mappingApi.executeCreateIndexResult = ElasticFailure(error) + + // When + val result = mappingApi.updateMapping("my-index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Creation failed") + } + + "fail when setMapping fails during creation" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + val error = ElasticError("Mapping invalid", statusCode = Some(400)) + mappingApi.executeSetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.updateMapping("my-index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Mapping invalid") + } + + "rollback when migration fails" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // First reindex fails, second succeeds + val error = ElasticError("Reindex failed", statusCode = Some(500)) + var reindexCallCount = 0 + mappingApi.executeReindexFunction = { (source, target, refresh) => + reindexCallCount += 1 + if (reindexCallCount == 2) { + ElasticSuccess((true, Some(100L))) + } else { + ElasticFailure(error) + } + } + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + + verify(mockLogger).info(contains("✅ Backed up original mapping")) + verify(mockLogger).error(contains("❌ Migration failed")) + verify(mockLogger).info(contains("Attempting rollback")) + verify(mockLogger).info(contains("✅ Rollback completed successfully")) + } + + "fail when backup fails" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + val error = ElasticError("Cannot get mapping", statusCode = Some(500)) + mappingApi.executeGetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Cannot get mapping") + } + + "use default settings when not provided" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", validMapping) + + // Then + result.isSuccess shouldBe true + } + } + + "performMigration" should { + + "successfully migrate index with all steps" in { + // Given + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + + // Trigger migration + val migrationResult = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + migrationResult.isSuccess shouldBe true + verify(mockLogger, atLeastOnce).info(contains("Starting migration")) + verify(mockLogger, atLeastOnce).info(contains("✅ Migration completed")) + } + + "fail when temp index creation fails" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + + val error = ElasticError("Cannot create temp index", statusCode = Some(400)) + mappingApi.executeCreateIndexResult = ElasticFailure(error) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + verify(mockLogger).error(contains("❌ Migration failed")) + verify(mockLogger).info(contains("Attempting rollback")) + } + + "fail when first reindex fails" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + val error = ElasticError("Reindex to temp failed", statusCode = Some(500)) + mappingApi.executeReindexFunction = (_, _, _) => ElasticFailure(error) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + verify(mockLogger).error(contains("❌ Migration failed")) + } + + "fail when delete original fails" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + + val error = ElasticError("Cannot delete index", statusCode = Some(403)) + mappingApi.executeDeleteIndexResult = ElasticFailure(error) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + } + } + + "rollbackMigration" should { + + "successfully rollback after migration failure" in { + // Given - Setup for migration that will fail + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // Fail on first reindex + var reindexCallCount = 0 + mappingApi.executeReindexFunction = { (source, target, refresh) => + reindexCallCount += 1 + if (reindexCallCount == 2) { + ElasticSuccess((true, Some(100L))) + } else { + ElasticFailure(ElasticError("Reindex failed")) + } + } + + // Rollback operations succeed + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + verify(mockLogger).info(contains("Attempting rollback")) + verify(mockLogger).info(contains("✅ Rollback completed successfully")) + } + + "handle rollback when temp index exists" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + + // Fail on delete original + var deleteCallCount = 0 + mappingApi.executeDeleteIndexResult = { + deleteCallCount += 1 + if (deleteCallCount == 1) ElasticFailure(ElasticError("Delete failed")) + else ElasticSuccess(true) + } + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + verify(mockLogger).info(contains("Attempting rollback")) + } + + "log error when rollback fails" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // Fail migration + mappingApi.executeReindexFunction = + (_, _, _) => ElasticFailure(ElasticError("Reindex failed")) + + // Fail rollback + mappingApi.executeDeleteIndexResult = ElasticFailure(ElasticError("Delete failed")) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + verify(mockLogger).info(contains("Attempting rollback")) + verify(mockLogger).error(contains("❌ Rollback failed")) + } + } + + "workflow scenarios" should { + + "successfully create new index with mapping" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("products", validMapping, validSettings) + + // Then + result.isSuccess shouldBe true + verify(mockLogger).info("Creating new index 'products' with mapping") + verify(mockLogger, atLeastOnce).info("✅ Index 'products' created successfully") + verify(mockLogger).info("✅ Mapping for index 'products' set successfully") + } + + "successfully update existing index with new mapping" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping, validSettings) + + // Then + result.isSuccess shouldBe true + verify(mockLogger).info(contains("needs update")) + verify(mockLogger, atLeastOnce).info(contains("Starting migration")) + verify(mockLogger, atLeastOnce).info(contains("✅ Migration completed successfully")) + } + + "skip update when mapping is identical" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + + // When + val result = mappingApi.updateMapping("my-index", validMapping) + + // Then + result.isSuccess shouldBe true + verify(mockLogger).info("✅ Mapping for index 'my-index' is already up to date") + verify(mockLogger, never).info(contains("migration")) + } + + "handle full migration lifecycle" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isSuccess shouldBe true + + // Verify all migration steps logged + verify(mockLogger).info(contains("✅ Backed up original mapping")) + verify(mockLogger, atLeastOnce).info(contains("Starting migration")) + verify(mockLogger, atLeastOnce).info(contains("✅ Migration completed")) + } + + "chain setMapping and getMapping" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + + // When + val setResult = mappingApi.setMapping("my-index", validMapping) + val getResult = mappingApi.getMapping("my-index") + + // Then + setResult.isSuccess shouldBe true + getResult.isSuccess shouldBe true + getResult.get shouldBe validMapping + } + + "verify mapping update detection" in { + // Given + val oldMapping = """{"properties":{"name":{"type":"text"}}}""" + val newMapping = """{"properties":{"name":{"type":"text"},"age":{"type":"integer"}}}""" + mappingApi.executeGetMappingResult = ElasticSuccess(oldMapping) + + // When + val shouldUpdate = mappingApi.shouldUpdateMapping("my-index", newMapping) + + // Then + shouldUpdate.isSuccess shouldBe true + shouldUpdate.get shouldBe true + } + + "handle complete rollback scenario" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // First reindex fails, second succeeds + var reindexCallCount = 0 + mappingApi.executeReindexFunction = { (source, target, refresh) => + reindexCallCount += 1 + if (reindexCallCount == 2) { + ElasticSuccess((true, Some(100L))) + } else { + ElasticFailure(ElasticError("Second reindex failed")) + } + } + + // Rollback succeeds + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + + captureAndVerifyLog(mockLogger, "error", "❌ Migration failed") + captureAndVerifyLog(mockLogger, "info", "Attempting rollback", "✅ Rollback index completed") + } + } + + "edge cases" should { + + "handle index name at maximum length" in { + // Given + val maxIndex = "a" * 255 + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping(maxIndex, validMapping) + + // Then + result.isSuccess shouldBe true + } + + "reject index name exceeding maximum length" in { + // Given + val tooLong = "a" * 256 + + // When + val result = mappingApi.setMapping(tooLong, validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + } + + "handle very large mapping JSON" in { + // Given + val fields = (1 to 1000).map(i => s""""field$i":{"type":"text"}""").mkString(",") + val largeMapping = s"""{"properties":{$fields}}""" + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", largeMapping) + + // Then + result.isSuccess shouldBe true + } + + "handle mapping with unicode characters" in { + // Given + val unicodeMapping = """{"properties":{"名前":{"type":"text"},"café":{"type":"keyword"}}}""" + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", unicodeMapping) + + // Then + result.isSuccess shouldBe true + } + + "handle mapping with special JSON characters" in { + // Given + val specialMapping = """{"properties":{"field\"name":{"type":"text"}}}""" + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", specialMapping) + + // Then + result.isSuccess shouldBe true + } + + "handle empty mapping properties" in { + // Given + val emptyMapping = """{"properties":{}}""" + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", emptyMapping) + + // Then + result.isSuccess shouldBe true + } + + "handle mapping with deeply nested objects" in { + // Given + val deepMapping = + """{ + | "properties": { + | "level1": { + | "properties": { + | "level2": { + | "properties": { + | "level3": { + | "properties": { + | "level4": {"type": "text"} + | } + | } + | } + | } + | } + | } + | } + |}""".stripMargin + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", deepMapping) + + // Then + result.isSuccess shouldBe true + } + + "handle concurrent setMapping calls" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val results = (1 to 5).map(i => mappingApi.setMapping(s"index-$i", validMapping)) + + // Then + results.foreach(_.isSuccess shouldBe true) + } + + "handle consecutive updateMapping calls" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result1 = mappingApi.updateMapping("index1", validMapping) + val result2 = mappingApi.updateMapping("index2", validMapping) + val result3 = mappingApi.updateMapping("index3", validMapping) + + // Then + result1.isSuccess shouldBe true + result2.isSuccess shouldBe true + result3.isSuccess shouldBe true + } + + "handle null response from executeGetMapping" in { + // Given + mappingApi.executeGetMappingResult = ElasticSuccess(null) + + // When + val result = mappingApi.getMapping("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe null + } + + "handle whitespace-only mapping" in { + // Given + val whitespaceMapping = " " + + // When + val result = mappingApi.setMapping("my-index", whitespaceMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid mapping") + } + + "handle mapping with comments (invalid JSON)" in { + // Given + val mappingWithComments = """{"properties":{"name":{"type":"text"}}} // comment""" + + // When + val result = mappingApi.setMapping("my-index", mappingWithComments) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid mapping") + } + } + + "error handling" should { + + "preserve error context in setMapping" in { + // Given + val error = ElasticError( + message = "Mapping conflict", + cause = Some(new RuntimeException("Root cause")), + statusCode = Some(400), + index = Some("test-index") + ) + mappingApi.executeSetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.setMapping("my-index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Mapping conflict" + result.error.get.cause shouldBe defined + result.error.get.statusCode shouldBe Some(400) + } + + "handle authentication error" in { + // Given + val error = ElasticError("Authentication required", statusCode = Some(401)) + mappingApi.executeSetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.setMapping("my-index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(401) + verify(mockLogger).error(contains("❌ Failed to update mapping")) + } + + "handle authorization error" in { + // Given + val error = ElasticError("Insufficient permissions", statusCode = Some(403)) + mappingApi.executeSetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.setMapping("my-index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(403) + } + + "handle timeout error" in { + // Given + val error = ElasticError( + "Request timeout", + cause = Some(new java.net.SocketTimeoutException()), + statusCode = Some(504) + ) + mappingApi.executeGetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.getMapping("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + result.error.get.cause.get shouldBe a[java.net.SocketTimeoutException] + } + + "handle server error" in { + // Given + val error = ElasticError("Internal server error", statusCode = Some(500)) + mappingApi.executeSetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.setMapping("my-index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(500) + } + + "handle conflict error" in { + // Given + val error = ElasticError("Version conflict", statusCode = Some(409)) + mappingApi.executeSetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.setMapping("my-index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(409) + } + + "handle network error" in { + // Given + val error = ElasticError( + "Connection refused", + cause = Some(new java.net.ConnectException()), + statusCode = Some(503) + ) + mappingApi.executeGetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.getMapping("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + } + + "propagate errors through flatMap chain" in { + // Given + mappingApi.executeIndexExistsResult = ElasticFailure(ElasticError("Connection failed")) + + // When + val result = mappingApi.updateMapping("my-index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Connection failed" + } + + "handle error in shouldUpdateMapping" in { + // Given + val error = ElasticError("Cannot retrieve mapping", statusCode = Some(500)) + mappingApi.executeGetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.shouldUpdateMapping("my-index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Cannot retrieve mapping" + } + + "handle partial failure in createIndexWithMapping" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + + // setMapping fails + val error = ElasticError("Invalid mapping structure", statusCode = Some(400)) + mappingApi.executeSetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi.updateMapping("my-index", validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid mapping structure") + } + + "handle all validation errors" in { + // Invalid index + val result1 = mappingApi.setMapping("INVALID", validMapping) + result1.isFailure shouldBe true + result1.error.get.operation shouldBe Some("setMapping") + + // Invalid mapping + val result2 = mappingApi.setMapping("my-index", "invalid") + result2.isFailure shouldBe true + result2.error.get.operation shouldBe Some("setMapping") + + // Empty index + val result3 = mappingApi.getMapping("") + result3.isFailure shouldBe true + result3.error.get.operation shouldBe Some("getMapping") + + // Null index + val result4 = mappingApi.getMapping(null) + result4.isFailure shouldBe true + result4.error.get.operation shouldBe Some("getMapping") + } + } + + "logging behavior" should { + + "log debug message for setMapping" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + mappingApi.setMapping("my-index", validMapping) + + // Then + verify(mockLogger).debug(s"Setting mapping for index 'my-index': $validMapping") + } + + "log info with emoji for successful setMapping" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + mappingApi.setMapping("my-index", validMapping) + + // Then + verify(mockLogger).info("✅ Mapping for index 'my-index' updated successfully") + } + + "log info when mapping not updated" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(false) + + // When + mappingApi.setMapping("my-index", validMapping) + + // Then + verify(mockLogger).info("✅ Mapping for index 'my-index' not updated") + } + + "log error with emoji for failed setMapping" in { + // Given + mappingApi.executeSetMappingResult = ElasticFailure(ElasticError("Failed")) + + // When + mappingApi.setMapping("my-index", validMapping) + + // Then + verify(mockLogger).error("❌ Failed to update mapping for index 'my-index': Failed") + } + + "log debug message for getMapping" in { + // Given + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + + // When + mappingApi.getMapping("my-index") + + // Then + verify(mockLogger).debug("Getting mapping for index 'my-index'") + } + + "not log for validation failures" in { + // When + mappingApi.setMapping("INVALID", validMapping) + + // Then + verify(mockLogger, never).debug(any[String]) + verify(mockLogger, never).info(any[String]) + verify(mockLogger, never).error(any[String]) + } + + "log migration steps" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + mappingApi.updateMapping("my-index", updatedMapping) + + // Then + verify(mockLogger).info(contains("needs update")) + verify(mockLogger, atLeastOnce).info(contains("Starting migration")) + verify(mockLogger).info(contains("✅ Backed up original mapping")) + verify(mockLogger).info(contains("✅ Migration completed successfully")) + } + + "log rollback steps" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + // First reindex fails, second succeeds + var reindexCallCount = 0 + mappingApi.executeReindexFunction = { (source, target, refresh) => + reindexCallCount += 1 + if (reindexCallCount == 2) { + ElasticSuccess((true, Some(100L))) + } else { + ElasticFailure(ElasticError("Second reindex failed")) + } + } + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + mappingApi.updateMapping("my-index", updatedMapping) + + // Then + captureAndVerifyLog(mockLogger, "error", "❌ Migration failed") + captureAndVerifyLog( + mockLogger, + "info", + "Attempting rollback", + "✅ Rollback completed successfully" + ) + } + + "log rollback failure" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = + (_, _, _) => ElasticFailure(ElasticError("Migration failed")) + mappingApi.executeDeleteIndexResult = ElasticFailure(ElasticError("Rollback failed")) + + // When + mappingApi.updateMapping("my-index", updatedMapping) + + // Then + verify(mockLogger).error(contains("❌ Migration failed")) + verify(mockLogger).info(contains("Attempting rollback")) + verify(mockLogger).error(contains("❌ Rollback failed")) + } + + "log when mapping is up to date" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + + // When + mappingApi.updateMapping("my-index", validMapping) + + // Then + verify(mockLogger).info("✅ Mapping for index 'my-index' is already up to date") + } + + "log index creation with mapping" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + mappingApi.updateMapping("my-index", validMapping) + + // Then + verify(mockLogger).info("Creating new index 'my-index' with mapping") + verify(mockLogger, atLeastOnce).info("✅ Index 'my-index' created successfully") + verify(mockLogger).info("✅ Mapping for index 'my-index' set successfully") + } + + /*"log backup failure" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticFailure(ElasticError("Cannot get mapping")) + + // When + mappingApi.updateMapping("my-index", updatedMapping) + + // Then + verify(mockLogger).error(contains("❌ Failed to backup original state")) + }*/ + + "log all emojis correctly" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + mappingApi.updateMapping("my-index", validMapping) + + // Then + verify(mockLogger, atLeast(1)).info(contains("✅")) + } + } + + "validation order" should { + + "validate index before mapping in setMapping" in { + // When + val result = mappingApi.setMapping("INVALID", "invalid json") + + // Then + result.error.get.message should include("Invalid index") + result.error.get.message should not include "Invalid mapping" + } + + "validate mapping after index in setMapping" in { + // When + val result = mappingApi.setMapping("my-index", "invalid json") + + // Then + result.error.get.message should include("Invalid mapping") + } + + "validate index in getMapping" in { + // When + val result = mappingApi.getMapping("INVALID") + + // Then + result.error.get.message should include("Invalid index") + } + + "not call execute methods when validation fails" in { + // Given + var executeCalled = false + val validatingApi = new TestMappingApi { + override private[client] def executeSetMapping( + index: String, + mapping: String + ): ElasticResult[Boolean] = { + executeCalled = true + ElasticSuccess(true) + } + } + + // When + validatingApi.setMapping("INVALID", validMapping) + + // Then + executeCalled shouldBe false + } + } + + "ElasticResult integration" should { + + "work with map transformation" in { + // Given + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + + // When + val result = mappingApi.getMapping("my-index").map(_.length) + + // Then + result.isSuccess shouldBe true + result.get shouldBe validMapping.length + } + + "work with flatMap composition" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + + // When + val result = mappingApi.setMapping("my-index", validMapping).flatMap { _ => + mappingApi.getMapping("my-index") + } + + // Then + result.isSuccess shouldBe true + result.get shouldBe validMapping + } + + "work with for-comprehension" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + + // When + val result = for { + _ <- mappingApi.setMapping("my-index", validMapping) + mapping <- mappingApi.getMapping("my-index") + } yield mapping + + // Then + result shouldBe ElasticSuccess(validMapping) + } + + "propagate errors through transformations" in { + // Given + val error = ElasticError("Failed") + mappingApi.executeGetMappingResult = ElasticFailure(error) + + // When + val result = mappingApi + .getMapping("my-index") + .map(_.length) + .flatMap(len => ElasticSuccess(len * 2)) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Failed" + } + + "handle chained operations with mixed results" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticFailure(ElasticError("Get failed")) + + // When + val result = for { + _ <- mappingApi.setMapping("my-index", validMapping) + mapping <- mappingApi.getMapping("my-index") + } yield mapping + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Get failed" + } + + "use filter in for-comprehension" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", validMapping) + + // Then + result.isSuccess shouldBe true + } + } + + "real-world scenarios" should { + + "add new field to existing mapping" in { + // Given + val oldMapping = """{"properties":{"name":{"type":"text"}}}""" + val newMapping = """{"properties":{"name":{"type":"text"},"email":{"type":"keyword"}}}""" + + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(oldMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", newMapping) + + // Then + result.isSuccess shouldBe true + verify(mockLogger).info(contains("needs update")) + verify(mockLogger).info(contains("Migration completed successfully")) + } + + "change field type in mapping" in { + // Given + val oldMapping = """{"properties":{"age":{"type":"text"}}}""" + val newMapping = """{"properties":{"age":{"type":"integer"}}}""" + + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(oldMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", newMapping) + + // Then + result.isSuccess shouldBe true + } + + "handle zero-downtime mapping update" in { + // Given - Simulates production scenario + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(10000L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isSuccess shouldBe true + verify(mockLogger, atLeastOnce).info(contains("Starting migration")) + verify(mockLogger, atLeastOnce).info(contains("Migration completed")) + } + + "recover from failed migration" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + // First reindex fails, second succeeds + var reindexCallCount = 0 + mappingApi.executeReindexFunction = { (source, target, refresh) => + reindexCallCount += 1 + if (reindexCallCount == 2) { + ElasticSuccess((true, Some(100L))) + } else { + ElasticFailure(ElasticError("Reindex timeout")) + } + } + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + verify(mockLogger).error(contains("❌ Migration failed")) + verify(mockLogger).info(contains("Attempting rollback")) + verify(mockLogger).info(contains("✅ Rollback completed successfully")) + } + + "bootstrap new index with complex mapping" in { + // Given + val complexMapping = + """{ + | "properties": { + | "user": { + | "type": "nested", + | "properties": { + | "name": {"type": "text"}, + | "email": {"type": "keyword"} + | } + | }, + | "tags": {"type": "keyword"}, + | "created": {"type": "date"} + | } + |}""".stripMargin + + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", complexMapping, validSettings) + + // Then + result.isSuccess shouldBe true + verify(mockLogger).info("Creating new index 'my-index' with mapping") + verify(mockLogger, atLeastOnce).info("✅ Index 'my-index' created successfully") + verify(mockLogger).info("✅ Mapping for index 'my-index' set successfully") + } + + "handle multi-tenant index mapping update" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(5000L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isSuccess shouldBe true + } + + "verify mapping before and after update" in { + // Given + val oldMapping = """{"properties":{"name":{"type":"text"}}}""" + val newMapping = """{"properties":{"name":{"type":"text"},"age":{"type":"integer"}}}""" + + mappingApi.executeGetMappingResult = ElasticSuccess(oldMapping) + + // When - Check if update needed + val shouldUpdate = mappingApi.shouldUpdateMapping("users", newMapping) + + // Then + shouldUpdate.isSuccess shouldBe true + shouldUpdate.get shouldBe true + } + + "handle incremental mapping evolution" in { + // Given - Step 1: Add email field + val mapping1 = """{"properties":{"name":{"type":"text"}}}""" + val mapping2 = """{"properties":{"name":{"type":"text"},"email":{"type":"keyword"}}}""" + val mapping3 = + """{"properties":{"name":{"type":"text"},"email":{"type":"keyword"},"age":{"type":"integer"}}}""" + + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(mapping1) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When - First update + val result1 = mappingApi.updateMapping("my-index", mapping2) + + // Then + result1.isSuccess shouldBe true + + // When - Second update + mappingApi.executeGetMappingResult = ElasticSuccess(mapping2) + val result2 = mappingApi.updateMapping("my-index", mapping3) + + // Then + result2.isSuccess shouldBe true + } + } + + "performance considerations" should { + + "handle rapid consecutive setMapping calls" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val start = System.currentTimeMillis() + (1 to 100).foreach(i => mappingApi.setMapping(s"index-$i", validMapping)) + val duration = System.currentTimeMillis() - start + + // Then - Should complete reasonably fast + duration should be < 5000L // 5 seconds + } + + "not accumulate memory with repeated calls" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When - Multiple iterations + (1 to 10).foreach { iteration => + (1 to 100).foreach(i => mappingApi.setMapping(s"index-$i", validMapping)) + } + + // Then - Should not throw OutOfMemoryError + succeed + } + + "handle large reindex operations" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(1000000L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isSuccess shouldBe true + } + } + + "error messages" should { + + "be descriptive for validation errors" in { + val result = mappingApi.setMapping("INVALID", validMapping) + result.error.get.message should include("Invalid index") + result.error.get.message should include("lowercase") + } + + "include operation context" in { + val result1 = mappingApi.setMapping("INVALID", validMapping) + result1.error.get.operation shouldBe Some("setMapping") + + val result2 = mappingApi.getMapping("INVALID") + result2.error.get.operation shouldBe Some("getMapping") + } + + "include status codes" in { + val result = mappingApi.setMapping("INVALID", validMapping) + result.error.get.statusCode shouldBe Some(400) + } + + "be clear about which parameter is invalid" in { + val result1 = mappingApi.setMapping("INVALID", validMapping) + result1.error.get.message should include("Invalid index") + + val result2 = mappingApi.setMapping("my-index", "invalid json") + result2.error.get.message should include("Invalid mapping") + } + + "preserve original error messages from execute methods" in { + // Given + val originalError = ElasticError("Custom mapping error", statusCode = Some(500)) + mappingApi.executeSetMappingResult = ElasticFailure(originalError) + + // When + val result = mappingApi.setMapping("my-index", validMapping) + + // Then + result.error.get.message shouldBe "Custom mapping error" + result.error.get.statusCode shouldBe Some(500) + } + + "include index name in error context" in { + val result = mappingApi.setMapping("INVALID", validMapping) + result.error.get.index shouldBe Some("INVALID") + } + + "provide helpful messages for JSON validation" in { + val result = mappingApi.setMapping("my-index", "{invalid json") + result.error.get.message should include("Invalid mapping") + } + } + + "boundary conditions" should { + + "handle minimum valid index name (1 char)" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("a", validMapping) + + // Then + result.isSuccess shouldBe true + } + + "reject index name exceeding 255 characters" in { + // Given + val tooLong = "a" * 256 + + // When + val result = mappingApi.setMapping(tooLong, validMapping) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + } + + "handle mapping at maximum reasonable size" in { + // Given + val fields = (1 to 1000).map(i => s""""field$i":{"type":"text"}""").mkString(",") + val largeMapping = s"""{"properties":{$fields}}""" + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", largeMapping) + + // Then + result.isSuccess shouldBe true + } + + "handle empty properties object" in { + // Given + val emptyMapping = """{"properties":{}}""" + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", emptyMapping) + + // Then + result.isSuccess shouldBe true + } + + "handle mapping with single property" in { + // Given + val singleProp = """{"properties":{"id":{"type":"keyword"}}}""" + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", singleProp) + + // Then + result.isSuccess shouldBe true + } + + "handle deeply nested mapping (10 levels)" in { + // Given + val deepMapping = + """{ + | "properties": { + | "l1": {"properties": { + | "l2": {"properties": { + | "l3": {"properties": { + | "l4": {"properties": { + | "l5": {"properties": { + | "l6": {"properties": { + | "l7": {"properties": { + | "l8": {"properties": { + | "l9": {"properties": { + | "l10": {"type": "text"} + | }} + | }} + | }} + | }} + | }} + | }} + | }} + | }} + | }} + | } + |}""".stripMargin + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", deepMapping) + + // Then + result.isSuccess shouldBe true + } + } + + "concurrent operations" should { + + "handle multiple concurrent setMapping calls" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val results = (1 to 5).map(i => mappingApi.setMapping(s"index-$i", validMapping)) + + // Then + results.foreach(_.isSuccess shouldBe true) + verify(mockLogger, times(5)).info(contains("✅ Mapping")) + } + + "handle mixed operations concurrently" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + + // When + val set1 = mappingApi.setMapping("index1", validMapping) + val get1 = mappingApi.getMapping("index1") + val set2 = mappingApi.setMapping("index2", validMapping) + val get2 = mappingApi.getMapping("index2") + + // Then + set1.isSuccess shouldBe true + get1.isSuccess shouldBe true + set2.isSuccess shouldBe true + get2.isSuccess shouldBe true + } + + "handle concurrent updateMapping operations" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val results = (1 to 3).map(i => mappingApi.updateMapping(s"index-$i", validMapping)) + + // Then + results.foreach(_.isSuccess shouldBe true) + } + } + + "JSON validation" should { + + "accept valid JSON with properties" in { + // Given + val validJson = """{"properties":{"name":{"type":"text"}}}""" + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", validJson) + + // Then + result.isSuccess shouldBe true + } + + "reject JSON without properties key" in { + // Given + val invalidJson = """{"mappings":{"name":{"type":"text"}}}""" + + // When + val result = mappingApi.setMapping("my-index", invalidJson) + + // Then + // Note: validateJson only checks if it's valid JSON, not structure + result.isSuccess shouldBe true + mappingApi.executeSetMappingResult = ElasticSuccess(true) + } + + "reject malformed JSON" in { + // Given + val malformedJson = """{"properties":{"name":{"type":"text"}""" + + // When + val result = mappingApi.setMapping("my-index", malformedJson) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid mapping") + } + + "accept JSON with extra whitespace" in { + // Given + val jsonWithSpaces = + """{ + | "properties": { + | "name": { + | "type": "text" + | } + | } + |}""".stripMargin + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", jsonWithSpaces) + + // Then + result.isSuccess shouldBe true + } + + "accept JSON with unicode characters" in { + // Given + val unicodeJson = """{"properties":{"名前":{"type":"text"}}}""" + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", unicodeJson) + + // Then + result.isSuccess shouldBe true + } + + "reject JSON with trailing comma" in { + // Given + val invalidJson = """{"properties":{"name":{"type":"text"},}}""" + + // When + val result = mappingApi.setMapping("my-index", invalidJson) + + // Then + result.isFailure shouldBe true + } + + "accept JSON with escaped characters" in { + // Given + val escapedJson = """{"properties":{"field\"name":{"type":"text"}}}""" + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", escapedJson) + + // Then + result.isSuccess shouldBe true + } + + "reject empty string as mapping" in { + // When + val result = mappingApi.setMapping("my-index", "") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid mapping") + } + + "reject null as mapping" in { + // When + val result = mappingApi.setMapping("my-index", null) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid mapping") + } + + "accept complex nested JSON" in { + // Given + val complexJson = + """{ + | "properties": { + | "user": { + | "type": "nested", + | "properties": { + | "name": {"type": "text"}, + | "contacts": { + | "type": "nested", + | "properties": { + | "email": {"type": "keyword"}, + | "phone": {"type": "keyword"} + | } + | } + | } + | } + | } + |}""".stripMargin + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", complexJson) + + // Then + result.isSuccess shouldBe true + } + } + + "migration edge cases" should { + + "handle migration when temp index already exists" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + + // First create fails (temp exists), but continues + var createCallCount = 0 + mappingApi.executeCreateIndexResult = { + createCallCount += 1 + if (createCallCount == 1) ElasticFailure(ElasticError("Index already exists")) + else ElasticSuccess(true) + } + + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + } + + "handle rollback when original index doesn't exist" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticFailure(ElasticError("Failed")) + + // Index doesn't exist during rollback + var existsCallCount = 0 + mappingApi.executeIndexExistsResult = { + existsCallCount += 1 + if (existsCallCount == 1) ElasticSuccess(true) + else ElasticSuccess(false) + } + + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + } + + "handle migration with zero documents" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(0L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isSuccess shouldBe true + } + + "handle partial reindex failure" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // First reindex succeeds, second fails + var reindexCallCount = 0 + mappingApi.executeReindexFunction = { (source, target, refresh) => + reindexCallCount += 1 + if (reindexCallCount == 1) { + ElasticSuccess((true, Some(100L))) + } else { + ElasticFailure(ElasticError("Second reindex failed")) + } + } + + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isFailure shouldBe true + } + } + + "integration with other APIs" should { + + "work with IndicesApi methods" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(false) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", validMapping) + + // Then + result.isSuccess shouldBe true + } + + "work with SettingsApi methods" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping, validSettings) + + // Then + result.isSuccess shouldBe true + } + + "work with RefreshApi methods" in { + // Given + mappingApi.executeSetMappingResult = ElasticSuccess(true) + + // When + val result = mappingApi.setMapping("my-index", validMapping) + + // Then + result.isSuccess shouldBe true + } + } + + "temp index naming" should { + + "generate unique temp index names" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When - Multiple migrations + val result1 = mappingApi.updateMapping("my-index", updatedMapping) + + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + val result2 = mappingApi.updateMapping("my-index", updatedMapping) + + // Then - Both should succeed (temp names are unique) + result1.isSuccess shouldBe true + result2.isSuccess shouldBe true + } + + "use correct temp index pattern" in { + // Given + mappingApi.executeIndexExistsResult = ElasticSuccess(true) + mappingApi.executeGetMappingResult = ElasticSuccess(validMapping) + mappingApi.executeLoadSettingsResult = ElasticSuccess(validSettings) + mappingApi.executeCreateIndexResult = ElasticSuccess(true) + mappingApi.executeSetMappingResult = ElasticSuccess(true) + mappingApi.executeReindexFunction = (_, _, _) => ElasticSuccess((true, Some(100L))) + mappingApi.executeDeleteIndexResult = ElasticSuccess(true) + mappingApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = mappingApi.updateMapping("my-index", updatedMapping) + + // Then + result.isSuccess shouldBe true + // Temp index name format: products_tmp_<8-char-uuid> + } + } + } +} diff --git a/core/src/test/scala/app/softnetwork/elastic/client/RefreshApiSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/RefreshApiSpec.scala new file mode 100644 index 00000000..75c6f1af --- /dev/null +++ b/core/src/test/scala/app/softnetwork/elastic/client/RefreshApiSpec.scala @@ -0,0 +1,826 @@ +package app.softnetwork.elastic.client + +import org.scalatest.wordspec.AnyWordSpec +import org.scalatest.matchers.should.Matchers +import org.scalatest.BeforeAndAfterEach +import org.mockito.MockitoSugar +import org.mockito.ArgumentMatchersSugar +import org.slf4j.Logger +import app.softnetwork.elastic.client.result._ + +/** Unit tests for RefreshApi + */ +class RefreshApiSpec + extends AnyWordSpec + with Matchers + with BeforeAndAfterEach + with MockitoSugar + with ArgumentMatchersSugar { + + // Mock logger + val mockLogger: Logger = mock[Logger] + + // Concrete implementation for testing + class TestRefreshApi extends RefreshApi { + override protected def logger: Logger = mockLogger + + // Variable to control the behavior of executeRefresh + var executeRefreshResult: ElasticResult[Boolean] = ElasticSuccess(true) + + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = { + executeRefreshResult + } + } + + var refreshApi: TestRefreshApi = _ + + override def beforeEach(): Unit = { + super.beforeEach() + refreshApi = new TestRefreshApi() + reset(mockLogger) + } + + "RefreshApi" should { + + "refresh" should { + + "successfully refresh a valid index" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi.refresh("my-index") + + // Then + result shouldBe ElasticSuccess(true) + result.isSuccess shouldBe true + result.get shouldBe true + + verify(mockLogger).debug("Refreshing index: my-index") + verify(mockLogger).info("✅ Index 'my-index' refreshed successfully") + verify(mockLogger, never).error(any[String]) + } + + "return true when refresh succeeds" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi.refresh("test-index") + + // Then + result.isSuccess shouldBe true + result.toOption shouldBe Some(true) + result.getOrElse(false) shouldBe true + + verify(mockLogger).info(contains("refreshed successfully")) + } + + "return false when refresh returns false" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(false) + + // When + val result = refreshApi.refresh("my-index") + + // Then + result shouldBe ElasticSuccess(false) + result.isSuccess shouldBe true + result.get shouldBe false + + verify(mockLogger).debug("Refreshing index: my-index") + verify(mockLogger).info("✅ Index 'my-index' not refreshed") + verify(mockLogger, never).error(any[String]) + } + + "handle refresh failure with error" in { + // Given + val error = ElasticError( + message = "Connection timeout", + cause = Some(new java.net.SocketTimeoutException("Read timed out")), + statusCode = Some(504), + operation = Some("executeRefresh") + ) + refreshApi.executeRefreshResult = ElasticFailure(error) + + // When + val result = refreshApi.refresh("my-index") + + // Then + result.isFailure shouldBe true + result.error shouldBe Some(error) + + verify(mockLogger).debug("Refreshing index: my-index") + verify(mockLogger).error("❌ Failed to refresh index 'my-index': Connection timeout") + verify(mockLogger, never).info(contains("✅")) + } + + "reject empty index name" in { + // When + val result = refreshApi.refresh("") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.message should include("Index name cannot be empty") + result.error.get.statusCode shouldBe Some(400) + result.error.get.index shouldBe Some("") + result.error.get.operation shouldBe Some("refresh") + + verify(mockLogger, never).debug(any[String]) + verify(mockLogger, never).info(any[String]) + verify(mockLogger, never).error(any[String]) + } + + "reject null index name" in { + // When + val result = refreshApi.refresh(null) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name cannot be empty") + result.error.get.statusCode shouldBe Some(400) + } + + "reject index name with only spaces" in { + // When + val result = refreshApi.refresh(" ") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name cannot be empty") + } + + "reject index name with uppercase letters" in { + // When + val result = refreshApi.refresh("MyIndex") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name must be lowercase") + result.error.get.statusCode shouldBe Some(400) + result.error.get.index shouldBe Some("MyIndex") + result.error.get.operation shouldBe Some("refresh") + } + + "reject index name starting with hyphen" in { + // When + val result = refreshApi.refresh("-my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name cannot start with '-', '_', or '+'") + } + + "reject index name starting with underscore" in { + // When + val result = refreshApi.refresh("_my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name cannot start with '-', '_', or '+'") + } + + "reject index name starting with plus" in { + // When + val result = refreshApi.refresh("+my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name cannot start with '-', '_', or '+'") + } + + "reject index name that is a single dot" in { + // When + val result = refreshApi.refresh(".") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name cannot be '.' or '..'") + } + + "reject index name that is double dots" in { + // When + val result = refreshApi.refresh("..") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name cannot be '.' or '..'") + } + + "reject index name with backslash" in { + // When + val result = refreshApi.refresh("my\\index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name contains invalid characters") + } + + "reject index name with forward slash" in { + // When + val result = refreshApi.refresh("my/index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name contains invalid characters") + } + + "reject index name with asterisk" in { + // When + val result = refreshApi.refresh("my*index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name contains invalid characters") + } + + "reject index name with question mark" in { + // When + val result = refreshApi.refresh("my?index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name contains invalid characters") + } + + "reject index name with double quote" in { + // When + val result = refreshApi.refresh("my\"index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name contains invalid characters") + } + + "reject index name with less than" in { + // When + val result = refreshApi.refresh("myindex") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name contains invalid characters") + } + + "reject index name with pipe" in { + // When + val result = refreshApi.refresh("my|index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name contains invalid characters") + } + + "reject index name with space" in { + // When + val result = refreshApi.refresh("my index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name contains invalid characters") + } + + "reject index name with comma" in { + // When + val result = refreshApi.refresh("my,index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name contains invalid characters") + } + + "reject index name with hash" in { + // When + val result = refreshApi.refresh("my#index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name contains invalid characters") + } + + "reject index name longer than 255 characters" in { + // Given + val longName = "a" * 256 + + // When + val result = refreshApi.refresh(longName) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index name is too long") + result.error.get.message should include("256") + } + + "accept index name with exactly 255 characters" in { + // Given + val maxName = "a" * 255 + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi.refresh(maxName) + + // Then + result.isSuccess shouldBe true + verify(mockLogger).debug(s"Refreshing index: $maxName") + } + + "accept valid index names with hyphens" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi.refresh("my-index-name") + + // Then + result.isSuccess shouldBe true + verify(mockLogger).info(contains("my-index-name")) + } + + "accept valid index names with numbers" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi.refresh("index123") + + // Then + result.isSuccess shouldBe true + } + + "accept valid index names with dots" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi.refresh("my.index.name") + + // Then + result.isSuccess shouldBe true + } + + "accept valid index names starting with letter" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi.refresh("myindex") + + // Then + result.isSuccess shouldBe true + } + + "accept valid index names starting with number" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi.refresh("123index") + + // Then + result.isSuccess shouldBe true + } + + "handle network timeout error" in { + // Given + val error = ElasticError( + message = "Connection timeout", + cause = Some(new java.net.SocketTimeoutException()), + statusCode = Some(504) + ) + refreshApi.executeRefreshResult = ElasticFailure(error) + + // When + val result = refreshApi.refresh("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + result.error.get.cause.get shouldBe a[java.net.SocketTimeoutException] + + verify(mockLogger).error(contains("Connection timeout")) + } + + "handle connection refused error" in { + // Given + val error = ElasticError( + message = "Connection refused", + cause = Some(new java.net.ConnectException()), + statusCode = Some(503) + ) + refreshApi.executeRefreshResult = ElasticFailure(error) + + // When + val result = refreshApi.refresh("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(503) + + verify(mockLogger).error(contains("Connection refused")) + } + + "handle index not found error" in { + // Given + val error = ElasticError( + message = "Index not found", + statusCode = Some(404), + index = Some("my-index") + ) + refreshApi.executeRefreshResult = ElasticFailure(error) + + // When + val result = refreshApi.refresh("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(404) + result.error.get.index shouldBe Some("my-index") + + verify(mockLogger).error(contains("Index not found")) + } + + "handle authentication error" in { + // Given + val error = ElasticError( + message = "Authentication failed", + statusCode = Some(401) + ) + refreshApi.executeRefreshResult = ElasticFailure(error) + + // When + val result = refreshApi.refresh("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(401) + + verify(mockLogger).error(contains("Authentication failed")) + } + + "handle authorization error" in { + // Given + val error = ElasticError( + message = "Insufficient permissions", + statusCode = Some(403) + ) + refreshApi.executeRefreshResult = ElasticFailure(error) + + // When + val result = refreshApi.refresh("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(403) + + verify(mockLogger).error(contains("Insufficient permissions")) + } + + "handle server error" in { + // Given + val error = ElasticError( + message = "Internal server error", + statusCode = Some(500) + ) + refreshApi.executeRefreshResult = ElasticFailure(error) + + // When + val result = refreshApi.refresh("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(500) + + verify(mockLogger).error(contains("Internal server error")) + } + + "support ElasticResult operations - map" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi.refresh("my-index").map { success => + if (success) "Refreshed" else "Not refreshed" + } + + // Then + result shouldBe ElasticSuccess("Refreshed") + result.get shouldBe "Refreshed" + } + + "support ElasticResult operations - flatMap" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi.refresh("my-index").flatMap { success => + if (success) ElasticSuccess("Operation completed") + else ElasticFailure(ElasticError("Refresh failed")) + } + + // Then + result.isSuccess shouldBe true + result.get shouldBe "Operation completed" + } + + "support ElasticResult operations - getOrElse" in { + // Given - Success case + refreshApi.executeRefreshResult = ElasticSuccess(true) + val result1 = refreshApi.refresh("my-index").getOrElse(false) + + // Given - Failure case + refreshApi.executeRefreshResult = ElasticFailure(ElasticError("Error")) + val result2 = refreshApi.refresh("my-index").getOrElse(false) + + // Then + result1 shouldBe true + result2 shouldBe false + } + + "support ElasticResult operations - toOption" in { + // Given - Success + refreshApi.executeRefreshResult = ElasticSuccess(true) + val option1 = refreshApi.refresh("my-index").toOption + + // Given - Failure + refreshApi.executeRefreshResult = ElasticFailure(ElasticError("Error")) + val option2 = refreshApi.refresh("my-index").toOption + + // Then + option1 shouldBe Some(true) + option2 shouldBe None + } + + "support ElasticResult operations - toEither" in { + // Given - Success + refreshApi.executeRefreshResult = ElasticSuccess(true) + val either1 = refreshApi.refresh("my-index").toEither + + // Given - Failure + val error = ElasticError("Error") + refreshApi.executeRefreshResult = ElasticFailure(error) + val either2 = refreshApi.refresh("my-index").toEither + + // Then + either1 shouldBe Right(true) + either2 shouldBe Left(error) + } + + "support ElasticResult operations - fold" in { + // Given - Success + refreshApi.executeRefreshResult = ElasticSuccess(true) + val folded1 = refreshApi + .refresh("my-index") + .fold( + error => s"Error: ${error.message}", + success => if (success) "Success" else "Failed" + ) + + // Given - Failure + refreshApi.executeRefreshResult = ElasticFailure(ElasticError("Network error")) + val folded2 = refreshApi + .refresh("my-index") + .fold( + error => s"Error: ${error.message}", + success => if (success) "Success" else "Failed" + ) + + // Then + folded1 shouldBe "Success" + folded2 shouldBe "Error: Network error" + } + + "support ElasticResult operations - foreach" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + var sideEffect: Option[Boolean] = None + + // When + refreshApi.refresh("my-index").foreach { success => + sideEffect = Some(success) + } + + // Then + sideEffect shouldBe Some(true) + } + + "support ElasticResult operations - filter" in { + // Given - Success with filter passing + refreshApi.executeRefreshResult = ElasticSuccess(true) + val filtered1 = refreshApi.refresh("my-index").filter(_ == true, "Not refreshed") + + // Given - Success with filter failing + refreshApi.executeRefreshResult = ElasticSuccess(false) + val filtered2 = refreshApi.refresh("my-index").filter(_ == true, "Not refreshed") + + // Then + filtered1.isSuccess shouldBe true + filtered1.get shouldBe true + + filtered2.isFailure shouldBe true + filtered2.error.get.message shouldBe "Not refreshed" + } + + "throw exception when calling get on failure" in { + // Given + refreshApi.executeRefreshResult = ElasticFailure(ElasticError("Refresh failed")) + + // When & Then + val exception = intercept[NoSuchElementException] { + refreshApi.refresh("my-index").get + } + + exception.getMessage should include("Refresh failed") + } + + "handle multiple sequential refresh calls" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result1 = refreshApi.refresh("index1") + val result2 = refreshApi.refresh("index2") + val result3 = refreshApi.refresh("index3") + + // Then + result1.isSuccess shouldBe true + result2.isSuccess shouldBe true + result3.isSuccess shouldBe true + + verify(mockLogger).debug("Refreshing index: index1") + verify(mockLogger).debug("Refreshing index: index2") + verify(mockLogger).debug("Refreshing index: index3") + verify(mockLogger, times(3)).info(contains("refreshed successfully")) + } + + "handle mixed success and failure scenarios" in { + // Given - First call succeeds + refreshApi.executeRefreshResult = ElasticSuccess(true) + val result1 = refreshApi.refresh("index1") + + // Given - Second call fails + refreshApi.executeRefreshResult = ElasticFailure(ElasticError("Error")) + val result2 = refreshApi.refresh("index2") + + // Given - Third call succeeds + refreshApi.executeRefreshResult = ElasticSuccess(false) + val result3 = refreshApi.refresh("index3") + + // Then + result1.isSuccess shouldBe true + result1.get shouldBe true + + result2.isFailure shouldBe true + + result3.isSuccess shouldBe true + result3.get shouldBe false + + verify(mockLogger, times(1)).info(contains("refreshed successfully")) + verify(mockLogger, times(1)).error(contains("Failed to refresh")) + verify(mockLogger, times(1)).info(contains("not refreshed")) + } + + "trim index name before validation" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi.refresh(" my-index ") + + // Then + result.isSuccess shouldBe true + verify(mockLogger).debug("Refreshing index: my-index ") + } + + "handle error with full context information" in { + // Given + val error = ElasticError( + message = "Refresh timeout", + cause = Some(new java.net.SocketTimeoutException()), + statusCode = Some(504), + index = Some("my-index"), + operation = Some("executeRefresh") + ) + refreshApi.executeRefreshResult = ElasticFailure(error) + + // When + val result = refreshApi.refresh("my-index") + + // Then + result.error.get.fullMessage should include("executeRefresh") + result.error.get.fullMessage should include("my-index") + result.error.get.fullMessage should include("504") + result.error.get.fullMessage should include("Refresh timeout") + } + + "not call executeRefresh when validation fails" in { + // Given + var executeCalled = false + val validatingApi = new RefreshApi { + override protected def logger: Logger = mockLogger + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = { + executeCalled = true + ElasticSuccess(true) + } + } + + // When + validatingApi.refresh("INVALID") + + // Then + executeCalled shouldBe false + verify(mockLogger, never).debug(any[String]) + verify(mockLogger, never).info(any[String]) + } + + "call executeRefresh only after successful validation" in { + // Given + var executeCalled = false + val validatingApi = new RefreshApi { + override protected def logger: Logger = mockLogger + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = { + executeCalled = true + ElasticSuccess(true) + } + } + + // When + validatingApi.refresh("valid-index") + + // Then + executeCalled shouldBe true + verify(mockLogger).debug("Refreshing index: valid-index") + } + } + + "ElasticResult integration" should { + + "work with map transformation" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi + .refresh("my-index") + .map(success => if (success) 1 else 0) + .map(_ * 100) + + // Then + result shouldBe ElasticSuccess(100) + } + + "work with flatMap composition" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = refreshApi.refresh("my-index").flatMap { success => + if (success) ElasticSuccess("Index is fresh") + else ElasticFailure(ElasticError("Index not refreshed")) + } + + // Then + result.isSuccess shouldBe true + result.get shouldBe "Index is fresh" + } + + "propagate errors through transformations" in { + // Given + refreshApi.executeRefreshResult = ElasticFailure(ElasticError("Network error")) + + // When + val result = refreshApi + .refresh("my-index") + .map(!_) + .flatMap(v => ElasticSuccess(s"Result: $v")) + .filter(_ => true) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Network error" + } + + "work with for-comprehension" in { + // Given + refreshApi.executeRefreshResult = ElasticSuccess(true) + + // When + val result = for { + refreshed <- refreshApi.refresh("index1") + message = if (refreshed) "OK" else "KO" + } yield message + + // Then + result shouldBe ElasticSuccess("OK") + } + } + } +} diff --git a/core/src/test/scala/app/softnetwork/elastic/client/SettingsApiSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/SettingsApiSpec.scala new file mode 100644 index 00000000..e6a86dba --- /dev/null +++ b/core/src/test/scala/app/softnetwork/elastic/client/SettingsApiSpec.scala @@ -0,0 +1,1509 @@ +package app.softnetwork.elastic.client + +import org.scalatest.wordspec.AnyWordSpec +import org.scalatest.matchers.should.Matchers +import org.scalatest.BeforeAndAfterEach +import org.mockito.MockitoSugar +import org.mockito.ArgumentMatchersSugar +import org.slf4j.Logger +import app.softnetwork.elastic.client.result._ +import com.google.gson.JsonParser + +/** Unit tests for SettingsApi Coverage target: 80%+ Using mockito-scala 1.17.12 + */ +class SettingsApiSpec + extends AnyWordSpec + with Matchers + with BeforeAndAfterEach + with MockitoSugar + with ArgumentMatchersSugar { + + // Mock logger + val mockLogger: Logger = mock[Logger] + + // Concrete implementation for testing + class TestSettingsApi extends SettingsApi with IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + // Control variables + var executeUpdateSettingsResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeLoadSettingsResult: ElasticResult[String] = ElasticSuccess( + """{"my-index":{"settings":{"index":{"number_of_shards":"1"}}}}""" + ) + var executeCloseIndexResult: ElasticResult[Boolean] = ElasticSuccess(true) + var executeOpenIndexResult: ElasticResult[Boolean] = ElasticSuccess(true) + + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): ElasticResult[Boolean] = { + executeUpdateSettingsResult + } + + override private[client] def executeLoadSettings(index: String): ElasticResult[String] = { + executeLoadSettingsResult + } + + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = { + executeCloseIndexResult + } + + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = { + executeOpenIndexResult + } + + // Other required methods + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + var settingsApi: TestSettingsApi = _ + + override def beforeEach(): Unit = { + super.beforeEach() + settingsApi = new TestSettingsApi() + reset(mockLogger) + } + + "SettingsApi" should { + + "toggleRefresh" should { + + "enable refresh interval when enable is true" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = settingsApi.toggleRefresh("my-index", enable = true) + + // Then + result.isSuccess shouldBe true + + verify(mockLogger).debug(contains("refresh_interval")) + verify(mockLogger).debug(contains("1s")) + } + + "disable refresh interval when enable is false" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = settingsApi.toggleRefresh("my-index", enable = false) + + // Then + result.isSuccess shouldBe true + + verify(mockLogger).debug(contains("refresh_interval")) + verify(mockLogger).debug(contains("-1")) + } + + "reject invalid index name" in { + // When + val result = settingsApi.toggleRefresh("INVALID", enable = true) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.operation shouldBe Some("updateSettings") + } + + "handle failure when closing index fails" in { + // Given + val error = ElasticError("Cannot close index", statusCode = Some(500)) + settingsApi.executeCloseIndexResult = ElasticFailure(error) + + // When + val result = settingsApi.toggleRefresh("my-index", enable = true) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Cannot close index" + + verify(mockLogger).error(contains("Closing index my-index failed")) + } + + "handle failure when updating settings fails" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + val error = ElasticError("Update failed", statusCode = Some(500)) + settingsApi.executeUpdateSettingsResult = ElasticFailure(error) + + // When + val result = settingsApi.toggleRefresh("my-index", enable = true) + + // Then + result.isFailure shouldBe true + + verify(mockLogger).error(contains("Updating settings for index 'my-index' failed")) + } + + "handle failure when opening index fails after successful update" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + val error = ElasticError("Cannot open index", statusCode = Some(500)) + settingsApi.executeOpenIndexResult = ElasticFailure(error) + + // When + val result = settingsApi.toggleRefresh("my-index", enable = true) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Cannot open index" + + verify(mockLogger).info("✅ Updating settings for index 'my-index' succeeded") + } + } + + "setReplicas" should { + + "successfully set number of replicas" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = settingsApi.setReplicas("my-index", 2) + + // Then + result.isSuccess shouldBe true + + verify(mockLogger).debug(contains("number_of_replicas")) + verify(mockLogger).debug(contains("2")) + } + + "accept zero replicas" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = settingsApi.setReplicas("my-index", 0) + + // Then + result.isSuccess shouldBe true + + verify(mockLogger).debug(contains("0")) + } + + "accept multiple replicas" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = settingsApi.setReplicas("my-index", 5) + + // Then + result.isSuccess shouldBe true + + verify(mockLogger).debug(contains("5")) + } + + "reject invalid index name" in { + // When + val result = settingsApi.setReplicas("INVALID", 2) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + } + + "handle negative replicas value" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When - Note: validation of replica count is ES responsibility + val result = settingsApi.setReplicas("my-index", -1) + + // Then - API accepts it, ES will reject + result.isSuccess shouldBe true + verify(mockLogger).debug(contains("-1")) + } + + "handle failure during update" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + val error = ElasticError("Update failed") + settingsApi.executeUpdateSettingsResult = ElasticFailure(error) + + // When + val result = settingsApi.setReplicas("my-index", 2) + + // Then + result.isFailure shouldBe true + + verify(mockLogger).error(contains("Updating settings for index 'my-index' failed")) + } + } + + "updateSettings" should { + + "successfully update settings with default settings" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = settingsApi.updateSettings("my-index") + + // Then + result.isSuccess shouldBe true + + verify(mockLogger).debug(contains("🔧 Updating settings for index my-index")) + verify(mockLogger).info("✅ Updating settings for index 'my-index' succeeded") + } + + "successfully update settings with custom settings" in { + // Given + val customSettings = """{"index": {"max_result_window": 20000}}""" + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = settingsApi.updateSettings("my-index", customSettings) + + // Then + result.isSuccess shouldBe true + + verify(mockLogger).debug(contains(customSettings)) + verify(mockLogger).info(contains("succeeded")) + } + + "reject invalid index name" in { + // When + val result = settingsApi.updateSettings("INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.message should include("lowercase") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("updateSettings") + + verify(mockLogger, never).debug(any[String]) + } + + "reject empty index name" in { + // When + val result = settingsApi.updateSettings("") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("cannot be empty") + } + + "reject invalid JSON settings" in { + // Given + val invalidJson = """{"index": invalid}""" + + // When + val result = settingsApi.updateSettings("my-index", invalidJson) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid settings") + result.error.get.message should include("Invalid JSON") + result.error.get.statusCode shouldBe Some(400) + result.error.get.operation shouldBe Some("updateSettings") + } + + "reject empty settings" in { + // When + val result = settingsApi.updateSettings("my-index", "") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid settings") + result.error.get.message should include("cannot be empty") + } + + "reject null settings" in { + // When + val result = settingsApi.updateSettings("my-index", null) + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid settings") + } + + "fail when closeIndex fails" in { + // Given + val error = ElasticError("Cannot close index", statusCode = Some(500)) + settingsApi.executeCloseIndexResult = ElasticFailure(error) + + // When + val result = settingsApi.updateSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Cannot close index" + + // closeIndex logs info "Closing index..." before failing + verify(mockLogger).info(contains("Closing index 'my-index'")) + verify(mockLogger).error(contains("Closing index my-index failed")) + verify(mockLogger).error(contains("settings for index 'my-index' will not be updated")) + } + + "fail when executeUpdateSettings fails" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + val error = ElasticError("Update failed", statusCode = Some(500)) + settingsApi.executeUpdateSettingsResult = ElasticFailure(error) + + // When + val result = settingsApi.updateSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Update failed" + + // closeIndex logs info messages + verify(mockLogger).info(contains("Closing index 'my-index'")) + verify(mockLogger).info(contains("✅ Index 'my-index' closed successfully")) + verify(mockLogger).error("❌ Updating settings for index 'my-index' failed: Update failed") + } + + "fail when executeUpdateSettings returns false" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(false) + + // When + val result = settingsApi.updateSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Updating settings for index 'my-index' failed") + result.error.get.operation shouldBe Some("updateSettings") + result.error.get.index shouldBe Some("my-index") + } + + "fail when openIndex fails after successful update" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + val error = ElasticError("Cannot open index", statusCode = Some(500)) + settingsApi.executeOpenIndexResult = ElasticFailure(error) + + // When + val result = settingsApi.updateSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Cannot open index" + + verify(mockLogger).info("✅ Updating settings for index 'my-index' succeeded") + } + + "validate index name before settings" in { + // Given + val invalidJson = """invalid""" + + // When + val result = settingsApi.updateSettings("INVALID", invalidJson) + + // Then + result.error.get.message should include("Invalid index") + result.error.get.message should not include "Invalid settings" + } + + "handle network timeout during close" in { + // Given + val error = ElasticError( + "Connection timeout", + cause = Some(new java.net.SocketTimeoutException()), + statusCode = Some(504) + ) + settingsApi.executeCloseIndexResult = ElasticFailure(error) + + // When + val result = settingsApi.updateSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + } + + "handle authentication error" in { + // Given + val error = ElasticError("Authentication failed", statusCode = Some(401)) + settingsApi.executeCloseIndexResult = ElasticFailure(error) + + // When + val result = settingsApi.updateSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(401) + } + } + + "loadSettings" should { + + "successfully load settings for existing index" in { + // Given + val jsonResponse = + """{"my-index":{"settings":{"index":{"number_of_shards":"1","number_of_replicas":"2"}}}}""" + settingsApi.executeLoadSettingsResult = ElasticSuccess(jsonResponse) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isSuccess shouldBe true + result.get should include("number_of_shards") + result.get should include("number_of_replicas") + + verify(mockLogger).debug("🔍 Loading settings for index my-index") + } + + "extract only index settings from response" in { + // Given + val jsonResponse = """{"my-index":{"settings":{"index":{"number_of_shards":"3"}}}}""" + settingsApi.executeLoadSettingsResult = ElasticSuccess(jsonResponse) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isSuccess shouldBe true + val parsedResult = new JsonParser().parse(result.get).getAsJsonObject + parsedResult.has("number_of_shards") shouldBe true + parsedResult.get("number_of_shards").getAsString shouldBe "3" + } + + "reject invalid index name" in { + // When + val result = settingsApi.loadSettings("INVALID") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Invalid index") + result.error.get.operation shouldBe Some("updateSettings") + + verify(mockLogger, never).debug(contains("🔍")) + } + + "reject empty index name" in { + // When + val result = settingsApi.loadSettings("") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("cannot be empty") + } + + "fail when executeLoadSettings fails" in { + // Given + val error = ElasticError("Load failed", statusCode = Some(500)) + settingsApi.executeLoadSettingsResult = ElasticFailure(error) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Load failed" + } + + "fail when response contains invalid JSON" in { + // Given + settingsApi.executeLoadSettingsResult = ElasticSuccess("invalid json {") + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.operation shouldBe Some("loadSettings") + + verify(mockLogger).error(contains("Failed to parse JSON settings")) + } + + "fail when index not found in response" in { + // Given + val jsonResponse = """{"other-index":{"settings":{"index":{}}}}""" + settingsApi.executeLoadSettingsResult = ElasticSuccess(jsonResponse) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("Index 'my-index' not found in the loaded settings") + result.error.get.operation shouldBe Some("loadSettings") + result.error.get.index shouldBe Some("my-index") + + verify(mockLogger).error(contains("Index 'my-index' not found")) + } + + "fail when response is empty JSON object" in { + // Given + settingsApi.executeLoadSettingsResult = ElasticSuccess("{}") + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message should include("not found in the loaded settings") + } + + "handle complex settings structure" in { + // Given + val complexJson = + """{ + | "my-index": { + | "settings": { + | "index": { + | "number_of_shards": "5", + | "number_of_replicas": "2", + | "refresh_interval": "1s", + | "max_result_window": "10000", + | "analysis": { + | "analyzer": { + | "custom": { + | "type": "standard" + | } + | } + | } + | } + | } + | } + |}""".stripMargin + settingsApi.executeLoadSettingsResult = ElasticSuccess(complexJson) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isSuccess shouldBe true + result.get should include("number_of_shards") + result.get should include("analysis") + result.get should include("custom") + } + + "handle settings with nested objects" in { + // Given + val jsonResponse = + """{ + | "my-index": { + | "settings": { + | "index": { + | "mapping": { + | "total_fields": { + | "limit": "2000" + | } + | } + | } + | } + | } + |}""".stripMargin + settingsApi.executeLoadSettingsResult = ElasticSuccess(jsonResponse) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isSuccess shouldBe true + result.get should include("mapping") + result.get should include("total_fields") + } + + "handle network error" in { + // Given + val error = ElasticError( + "Connection timeout", + cause = Some(new java.net.SocketTimeoutException()), + statusCode = Some(504) + ) + settingsApi.executeLoadSettingsResult = ElasticFailure(error) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + } + + "handle index not found error" in { + // Given + val error = ElasticError("Index not found", statusCode = Some(404)) + settingsApi.executeLoadSettingsResult = ElasticFailure(error) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(404) + } + } + + "workflow scenarios" should { + + "successfully toggle refresh, update replicas, and load settings" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + settingsApi.executeLoadSettingsResult = ElasticSuccess( + """{"my-index":{"settings":{"index":{"number_of_replicas":"2","refresh_interval":"1s"}}}}""" + ) + + // When + val toggleResult = settingsApi.toggleRefresh("my-index", enable = true) + val replicasResult = settingsApi.setReplicas("my-index", 2) + val loadResult = settingsApi.loadSettings("my-index") + + // Then + toggleResult.isSuccess shouldBe true + replicasResult.isSuccess shouldBe true + loadResult.isSuccess shouldBe true + loadResult.get should include("number_of_replicas") + loadResult.get should include("refresh_interval") + } + + "handle partial failure in workflow" in { + // Given - First operation succeeds + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + val result1 = settingsApi.toggleRefresh("my-index", enable = true) + + // Given - Second operation fails + settingsApi.executeCloseIndexResult = ElasticFailure(ElasticError("Cannot close")) + val result2 = settingsApi.setReplicas("my-index", 2) + + // Then + result1.isSuccess shouldBe true + result2.isFailure shouldBe true + } + + "handle close-update-open workflow correctly" in { + // Given + var closeCalled = false + var updateCalled = false + var openCalled = false + + val workflowApi = new SettingsApi with IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = { + closeCalled = true + ElasticSuccess(true) + } + + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): ElasticResult[Boolean] = { + updateCalled = true + closeCalled shouldBe true // Close must be called first + ElasticSuccess(true) + } + + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = { + openCalled = true + updateCalled shouldBe true // Update must be called before open + ElasticSuccess(true) + } + + override private[client] def executeLoadSettings(index: String): ElasticResult[String] = + ??? + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + workflowApi.updateSettings("my-index") + + // Then + closeCalled shouldBe true + updateCalled shouldBe true + openCalled shouldBe true + } + + "not call update or open if close fails" in { + // Given + var updateCalled = false + var openCalled = false + + val workflowApi = new SettingsApi with IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = { + ElasticFailure(ElasticError("Close failed")) + } + + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): ElasticResult[Boolean] = { + updateCalled = true + ElasticSuccess(true) + } + + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = { + openCalled = true + ElasticSuccess(true) + } + + override private[client] def executeLoadSettings(index: String): ElasticResult[String] = + ??? + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + workflowApi.updateSettings("my-index") + + // Then + updateCalled shouldBe false + openCalled shouldBe false + } + + "not call open if update fails" in { + // Given + var openCalled = false + + val workflowApi = new SettingsApi with IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = { + ElasticSuccess(true) + } + + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): ElasticResult[Boolean] = { + ElasticFailure(ElasticError("Update failed")) + } + + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = { + openCalled = true + ElasticSuccess(true) + } + + override private[client] def executeLoadSettings(index: String): ElasticResult[String] = + ??? + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + workflowApi.updateSettings("my-index") + + // Then + openCalled shouldBe false + } + } + + "ElasticResult integration" should { + + "work with map transformation" in { + // Given + settingsApi.executeLoadSettingsResult = ElasticSuccess( + """{"my-index":{"settings":{"index":{"number_of_shards":"1"}}}}""" + ) + + // When + val result = settingsApi.loadSettings("my-index").map { settings => + settings.length + } + + // Then + result.isSuccess shouldBe true + result.get should be > 0 + } + + "work with flatMap composition" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + settingsApi.executeLoadSettingsResult = ElasticSuccess( + """{"my-index":{"settings":{"index":{"number_of_shards":"1"}}}}""" + ) + + // When + val result = settingsApi.updateSettings("my-index").flatMap { _ => + settingsApi.loadSettings("my-index") + } + + // Then + result.isSuccess shouldBe true + result.get should include("number_of_shards") + } + + "work with for-comprehension" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = for { + _ <- settingsApi.toggleRefresh("my-index", enable = true) + _ <- settingsApi.setReplicas("my-index", 2) + } yield "Success" + + // Then + result shouldBe ElasticSuccess("Success") + } + + "propagate errors through transformations" in { + // Given + val error = ElasticError("Load failed") + settingsApi.executeLoadSettingsResult = ElasticFailure(error) + + // When + val result = settingsApi + .loadSettings("my-index") + .map(_.toUpperCase) + .flatMap(s => ElasticSuccess(s.length)) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Load failed" + } + } + + "edge cases" should { + + "handle very large replica count" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = settingsApi.setReplicas("my-index", 1000) + + // Then + result.isSuccess shouldBe true + verify(mockLogger).debug(contains("1000")) + } + + "handle index name with maximum length" in { + // Given + val maxName = "a" * 255 + settingsApi.executeLoadSettingsResult = ElasticSuccess( + s"""{"$maxName":{"settings":{"index":{"number_of_shards":"1"}}}}""" + ) + + // When + val result = settingsApi.loadSettings(maxName) + + // Then + result.isSuccess shouldBe true + } + + "handle settings with special characters in JSON" in { + // Given + val settingsWithSpecialChars = + """{"index": {"routing": {"allocation": {"include": {"_tier": "data_hot"}}}}}""" + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = settingsApi.updateSettings("my-index", settingsWithSpecialChars) + + // Then + result.isSuccess shouldBe true + } + + "handle very long settings JSON" in { + // Given + val longSettings = + """{"index": {""" + (1 to 100).map(i => s""""field$i": "value$i"""").mkString(",") + "}}" + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result = settingsApi.updateSettings("my-index", longSettings) + + // Then + result.isSuccess shouldBe true + } + + "handle loadSettings response with extra whitespace" in { + // Given + val jsonWithWhitespace = + """{ + | "my-index" : { + | "settings" : { + | "index" : { + | "number_of_shards" : "1" + | } + | } + | } + |}""".stripMargin + settingsApi.executeLoadSettingsResult = ElasticSuccess(jsonWithWhitespace) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isSuccess shouldBe true + } + + "handle multiple toggleRefresh calls" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result1 = settingsApi.toggleRefresh("my-index", enable = true) + val result2 = settingsApi.toggleRefresh("my-index", enable = false) + val result3 = settingsApi.toggleRefresh("my-index", enable = true) + + // Then + result1.isSuccess shouldBe true + result2.isSuccess shouldBe true + result3.isSuccess shouldBe true + + verify(mockLogger, times(3)).info(contains("succeeded")) + } + + "handle setReplicas with same value multiple times" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val result1 = settingsApi.setReplicas("my-index", 2) + val result2 = settingsApi.setReplicas("my-index", 2) + + // Then + result1.isSuccess shouldBe true + result2.isSuccess shouldBe true + } + } + + "error handling" should { + + "handle all validation errors for updateSettings" in { + // Invalid index + val result1 = settingsApi.updateSettings("INVALID") + result1.isFailure shouldBe true + + // Invalid settings + val result2 = settingsApi.updateSettings("my-index", "invalid") + result2.isFailure shouldBe true + + // Empty index + val result3 = settingsApi.updateSettings("") + result3.isFailure shouldBe true + + // Empty settings + val result4 = settingsApi.updateSettings("my-index", "") + result4.isFailure shouldBe true + } + + "handle all validation errors for loadSettings" in { + // Invalid index + val result1 = settingsApi.loadSettings("INVALID") + result1.isFailure shouldBe true + + // Empty index + val result2 = settingsApi.loadSettings("") + result2.isFailure shouldBe true + + // Null index + val result3 = settingsApi.loadSettings(null) + result3.isFailure shouldBe true + } + + "handle authentication error in updateSettings" in { + // Given + val error = ElasticError("Authentication failed", statusCode = Some(401)) + settingsApi.executeCloseIndexResult = ElasticFailure(error) + + // When + val result = settingsApi.updateSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(401) + } + + "handle authorization error in loadSettings" in { + // Given + val error = ElasticError("Insufficient permissions", statusCode = Some(403)) + settingsApi.executeLoadSettingsResult = ElasticFailure(error) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(403) + } + + "handle timeout error in updateSettings" in { + // Given + val error = ElasticError( + "Request timeout", + cause = Some(new java.net.SocketTimeoutException()), + statusCode = Some(504) + ) + settingsApi.executeCloseIndexResult = ElasticFailure(error) + + // When + val result = settingsApi.updateSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + result.error.get.cause.get shouldBe a[java.net.SocketTimeoutException] + } + + "handle server error in loadSettings" in { + // Given + val error = ElasticError("Internal server error", statusCode = Some(500)) + settingsApi.executeLoadSettingsResult = ElasticFailure(error) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(500) + } + + "preserve error context through workflow" in { + // Given + val error = ElasticError( + message = "Close failed", + cause = Some(new RuntimeException("Root cause")), + statusCode = Some(500), + index = Some("my-index"), + operation = Some("closeIndex") + ) + settingsApi.executeCloseIndexResult = ElasticFailure(error) + + // When + val result = settingsApi.updateSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Close failed" + result.error.get.cause shouldBe defined + result.error.get.statusCode shouldBe Some(500) + result.error.get.index shouldBe Some("my-index") + } + } + + "logging behavior" should { + + "log debug message with emoji for updateSettings" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + settingsApi.updateSettings("my-index") + + // Then + verify(mockLogger).debug(contains("🔧")) + verify(mockLogger).debug(contains("Updating settings for index my-index")) + } + + "log debug message with emoji for loadSettings" in { + // Given + settingsApi.executeLoadSettingsResult = ElasticSuccess( + """{"my-index":{"settings":{"index":{}}}}""" + ) + + // When + settingsApi.loadSettings("my-index") + + // Then + verify(mockLogger).debug(contains("🔍")) + verify(mockLogger).debug(contains("Loading settings for index my-index")) + } + + "log info message with emoji for successful update" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + settingsApi.updateSettings("my-index") + + // Then + // Multiple info logs with ✅: close, update, open + verify(mockLogger, atLeast(1)).info(contains("✅")) + verify(mockLogger).info(contains("succeeded")) + + // Or more specifically: + verify(mockLogger).info("✅ Index 'my-index' closed successfully") + verify(mockLogger).info("✅ Updating settings for index 'my-index' succeeded") + verify(mockLogger).info("✅ Index 'my-index' opened successfully") + } + + "log error message with emoji for failed close" in { + // Given + settingsApi.executeCloseIndexResult = ElasticFailure(ElasticError("Failed")) + + // When + settingsApi.updateSettings("my-index") + + // Then + // Multiple error logs with ❌: from closeIndex and updateSettings + verify(mockLogger, atLeast(1)).error(contains("❌")) + verify(mockLogger).error(contains("Closing index my-index failed")) + verify(mockLogger).error(contains("settings for index 'my-index' will not be updated")) + } + + "log error message with emoji for failed update" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticFailure(ElasticError("Failed")) + + // When + settingsApi.updateSettings("my-index") + + // Then + verify(mockLogger).error(contains("❌")) + verify(mockLogger).error(contains("Updating settings for index 'my-index' failed")) + } + + "log error message with emoji for parse failure" in { + // Given + settingsApi.executeLoadSettingsResult = ElasticSuccess("invalid json") + + // When + settingsApi.loadSettings("my-index") + + // Then + verify(mockLogger).error(contains("❌")) + verify(mockLogger).error(contains("Failed to parse JSON settings")) + } + + "log error message with emoji for index not found" in { + // Given + settingsApi.executeLoadSettingsResult = ElasticSuccess("{}") + + // When + settingsApi.loadSettings("my-index") + + // Then + verify(mockLogger).error(contains("❌")) + verify(mockLogger).error(contains("Index 'my-index' not found")) + } + + "not log anything for validation failures" in { + // When + settingsApi.updateSettings("INVALID") + + // Then + verify(mockLogger, never).debug(any[String]) + verify(mockLogger, never).info(any[String]) + verify(mockLogger, never).error(any[String]) + } + } + + "JSON parsing" should { + + "correctly parse nested index settings" in { + // Given + val jsonResponse = + """{ + | "my-index": { + | "settings": { + | "index": { + | "creation_date": "1234567890", + | "number_of_shards": "1", + | "number_of_replicas": "0", + | "uuid": "abc123", + | "version": { + | "created": "7100099" + | } + | } + | } + | } + |}""".stripMargin + settingsApi.executeLoadSettingsResult = ElasticSuccess(jsonResponse) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isSuccess shouldBe true + result.get should include("creation_date") + result.get should include("uuid") + result.get should include("version") + } + + "handle null values in JSON" in { + // Given + val jsonResponse = """{"my-index":{"settings":{"index":{"field":null}}}}""" + settingsApi.executeLoadSettingsResult = ElasticSuccess(jsonResponse) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isSuccess shouldBe true + } + + "handle empty index settings object" in { + // Given + val jsonResponse = """{"my-index":{"settings":{"index":{}}}}""" + settingsApi.executeLoadSettingsResult = ElasticSuccess(jsonResponse) + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isSuccess shouldBe true + result.get shouldBe "{}" + } + + "handle malformed JSON gracefully" in { + // Given + settingsApi.executeLoadSettingsResult = + ElasticSuccess("""{"my-index":{"settings":{"index":""") + + // When + val result = settingsApi.loadSettings("my-index") + + // Then + result.isFailure shouldBe true + result.error.get.operation shouldBe Some("loadSettings") + verify(mockLogger).error(contains("Failed to parse JSON")) + } + } + + "concurrent operations" should { + + "handle multiple concurrent updateSettings calls" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + + // When + val results = (1 to 3).map(i => settingsApi.updateSettings(s"index-$i")) + + // Then + results.foreach(_.isSuccess shouldBe true) + verify(mockLogger, times(3)).info(contains("succeeded")) + } + + "handle mixed operations concurrently" in { + // Given + settingsApi.executeCloseIndexResult = ElasticSuccess(true) + settingsApi.executeUpdateSettingsResult = ElasticSuccess(true) + settingsApi.executeOpenIndexResult = ElasticSuccess(true) + settingsApi.executeLoadSettingsResult = ElasticSuccess( + """{"my-index":{"settings":{"index":{}}}}""" + ) + + // When + val toggle = settingsApi.toggleRefresh("my-index", enable = true) + val replicas = settingsApi.setReplicas("my-index", 2) + val load = settingsApi.loadSettings("my-index") + + // Then + toggle.isSuccess shouldBe true + replicas.isSuccess shouldBe true + load.isSuccess shouldBe true + } + } + + "validation order" should { + + "validate index name before calling executeCloseIndex" in { + // Given + var closeCalled = false + val validatingApi = new SettingsApi with IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = { + closeCalled = true + ElasticSuccess(true) + } + + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeLoadSettings(index: String): ElasticResult[String] = + ??? + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + validatingApi.updateSettings("INVALID") + + // Then + closeCalled shouldBe false + } + + "validate settings after index name" in { + // Given + var closeCalled = false + val validatingApi = new SettingsApi with IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = { + closeCalled = true + ElasticSuccess(true) + } + + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeLoadSettings(index: String): ElasticResult[String] = + ??? + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + validatingApi.updateSettings("valid-index", "invalid json") + + // Then + closeCalled shouldBe false + } + + "validate index name before calling executeLoadSettings" in { + // Given + var loadCalled = false + val validatingApi = new SettingsApi with IndicesApi with RefreshApi { + override protected def logger: Logger = mockLogger + + override private[client] def executeLoadSettings(index: String): ElasticResult[String] = { + loadCalled = true + ElasticSuccess("{}") + } + + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = ??? + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = ??? + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = ??? + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = + ??? + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = ??? + } + + // When + validatingApi.loadSettings("INVALID") + + // Then + loadCalled shouldBe false + } + } + } +} diff --git a/core/src/test/scala/app/softnetwork/elastic/client/VersionApiSpec.scala b/core/src/test/scala/app/softnetwork/elastic/client/VersionApiSpec.scala new file mode 100644 index 00000000..41a9970c --- /dev/null +++ b/core/src/test/scala/app/softnetwork/elastic/client/VersionApiSpec.scala @@ -0,0 +1,519 @@ +package app.softnetwork.elastic.client + +import org.scalatest.wordspec.AnyWordSpec +import org.scalatest.matchers.should.Matchers +import org.scalatest.BeforeAndAfterEach +import org.mockito.MockitoSugar +import org.mockito.ArgumentMatchersSugar +import org.slf4j.Logger +import app.softnetwork.elastic.client.result._ + +/** Unit tests for VersionApi + */ +class VersionApiSpec + extends AnyWordSpec + with Matchers + with BeforeAndAfterEach + with MockitoSugar + with ArgumentMatchersSugar { + + // Mock logger + val mockLogger: Logger = mock[Logger] + + // Concrete implementation for testing + class TestVersionApi extends VersionApi with SerializationApi { + override protected def logger: Logger = mockLogger + + // Variable to control the behavior of executeVersion + var executeVersionResult: ElasticResult[String] = ElasticSuccess("8.11.0") + + override private[client] def executeVersion(): ElasticResult[String] = { + executeVersionResult + } + } + + var versionApi: TestVersionApi = _ + + override def beforeEach(): Unit = { + super.beforeEach() + versionApi = new TestVersionApi() + reset(mockLogger) + } + + "VersionApi" should { + + "version" should { + + "return version on first call and cache it" in { + // Given + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + + // When + val result1 = versionApi.version + val result2 = versionApi.version + + // Then + result1 shouldBe ElasticSuccess("8.11.0") + result2 shouldBe ElasticSuccess("8.11.0") + result1.isSuccess shouldBe true + result1.toOption shouldBe Some("8.11.0") + + // Should log success only once (first call) + verify(mockLogger, times(1)).info(contains("✅ Elasticsearch version: 8.11.0")) + verify(mockLogger, never).error(any[String]) + } + + "cache version after successful first call" in { + // Given + versionApi.executeVersionResult = ElasticSuccess("7.17.0") + + // When - First call + val result1 = versionApi.version + + // Change the result (simulating that executeVersion would return something different) + versionApi.executeVersionResult = ElasticSuccess("8.0.0") + + // Second call should still return cached version + val result2 = versionApi.version + + // Then + result1 shouldBe ElasticSuccess("7.17.0") + result2 shouldBe ElasticSuccess("7.17.0") // Still cached! + + verify(mockLogger, times(1)).info(contains("7.17.0")) + } + + "return failure when executeVersion fails" in { + // Given + val error = ElasticError( + message = "Connection refused", + cause = Some(new java.net.ConnectException("Connection refused")), + statusCode = Some(503), + operation = Some("executeVersion") + ) + versionApi.executeVersionResult = ElasticFailure(error) + + // When + val result = versionApi.version + + // Then + result shouldBe ElasticFailure(error) + result.isSuccess shouldBe false + result.isFailure shouldBe true + result.toOption shouldBe None + result.error shouldBe Some(error) + + verify(mockLogger, times(1)).error( + contains("❌ Failed to get Elasticsearch version: Connection refused") + ) + verify(mockLogger, never).info(any[String]) + } + + "not cache version when executeVersion fails" in { + // Given - First call fails + versionApi.executeVersionResult = ElasticFailure( + ElasticError("Connection timeout", statusCode = Some(503)) + ) + + // When - First call + val result1 = versionApi.version + + // Given - Second call succeeds + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + + // When - Second call + val result2 = versionApi.version + + // Then + result1.isFailure shouldBe true + result2 shouldBe ElasticSuccess("8.11.0") + + verify(mockLogger, times(1)).error(contains("Connection timeout")) + verify(mockLogger, times(1)).info(contains("8.11.0")) + } + + "handle different version formats" in { + // Given + val versions = Seq( + "7.17.0", + "8.0.0", + "8.11.0", + "8.11.1-SNAPSHOT" + ) + + versions.foreach { version => + // Reset API for each test + versionApi = new TestVersionApi() + versionApi.executeVersionResult = ElasticSuccess(version) + + // When + val result = versionApi.version + + // Then + result shouldBe ElasticSuccess(version) + result.get shouldBe version + } + } + + "handle authentication error" in { + // Given + val error = ElasticError( + message = "Authentication failed", + statusCode = Some(401), + operation = Some("executeVersion") + ) + versionApi.executeVersionResult = ElasticFailure(error) + + // When + val result = versionApi.version + + // Then + result.isFailure shouldBe true + result.error.get.statusCode shouldBe Some(401) + result.error.get.message shouldBe "Authentication failed" + + verify(mockLogger).error(contains("Authentication failed")) + } + + "handle timeout error" in { + // Given + val error = ElasticError( + message = "Request timeout after 30s", + cause = Some(new java.net.SocketTimeoutException("Read timed out")), + statusCode = Some(504), + operation = Some("executeVersion") + ) + versionApi.executeVersionResult = ElasticFailure(error) + + // When + val result = versionApi.version + + // Then + result.isFailure shouldBe true + result.error.get.cause shouldBe defined + result.error.get.cause.get shouldBe a[java.net.SocketTimeoutException] + + verify(mockLogger).error(contains("Request timeout")) + } + + "handle network error" in { + // Given + val error = ElasticError( + message = "No route to host", + cause = Some(new java.net.NoRouteToHostException("No route to host")), + statusCode = Some(503), + operation = Some("executeVersion") + ) + versionApi.executeVersionResult = ElasticFailure(error) + + // When + val result = versionApi.version + + // Then + result.isFailure shouldBe true + result.getOrElse("default") shouldBe "default" + + verify(mockLogger).error(contains("No route to host")) + } + + "handle malformed response error" in { + // Given + val error = ElasticError( + message = "Invalid JSON response", + cause = + Some(new com.fasterxml.jackson.core.JsonParseException(null, "Unexpected character")), + statusCode = Some(500), + operation = Some("executeVersion") + ) + versionApi.executeVersionResult = ElasticFailure(error) + + // When + val result = versionApi.version + + // Then + result.isFailure shouldBe true + result.error.get.cause.get shouldBe a[com.fasterxml.jackson.core.JsonParseException] + } + + "support ElasticResult operations - map" in { + // Given + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + + // When + val result = versionApi.version.map(v => s"Version: $v") + + // Then + result shouldBe ElasticSuccess("Version: 8.11.0") + result.get shouldBe "Version: 8.11.0" + } + + "support ElasticResult operations - flatMap" in { + // Given + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + + // When + val result = versionApi.version.flatMap { v => + if (v.startsWith("8")) ElasticSuccess(s"Version $v is ES 8.x") + else ElasticFailure(ElasticError("Not ES 8.x")) + } + + // Then + result.isSuccess shouldBe true + result.get shouldBe "Version 8.11.0 is ES 8.x" + } + + "support ElasticResult operations - getOrElse" in { + // Given - Success case + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + val result1 = versionApi.version.getOrElse("unknown") + + // Given - Failure case + versionApi = new TestVersionApi() + versionApi.executeVersionResult = ElasticFailure(ElasticError("Error")) + val result2 = versionApi.version.getOrElse("unknown") + + // Then + result1 shouldBe "8.11.0" + result2 shouldBe "unknown" + } + + "support ElasticResult operations - toOption" in { + // Given - Success + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + val option1 = versionApi.version.toOption + + // Given - Failure + versionApi = new TestVersionApi() + versionApi.executeVersionResult = ElasticFailure(ElasticError("Error")) + val option2 = versionApi.version.toOption + + // Then + option1 shouldBe Some("8.11.0") + option2 shouldBe None + } + + "support ElasticResult operations - toEither" in { + // Given - Success + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + val either1 = versionApi.version.toEither + + // Given - Failure + versionApi = new TestVersionApi() + val error = ElasticError("Error") + versionApi.executeVersionResult = ElasticFailure(error) + val either2 = versionApi.version.toEither + + // Then + either1 shouldBe Right("8.11.0") + either2 shouldBe Left(error) + } + + "support ElasticResult operations - fold" in { + // Given - Success + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + val folded1 = versionApi.version.fold( + error => s"Error: ${error.message}", + version => s"Success: $version" + ) + + // Given - Failure + versionApi = new TestVersionApi() + versionApi.executeVersionResult = ElasticFailure(ElasticError("Connection failed")) + val folded2 = versionApi.version.fold( + error => s"Error: ${error.message}", + version => s"Success: $version" + ) + + // Then + folded1 shouldBe "Success: 8.11.0" + folded2 shouldBe "Error: Connection failed" + } + + "support ElasticResult operations - foreach" in { + // Given + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + var sideEffect: Option[String] = None + + // When + versionApi.version.foreach { v => + sideEffect = Some(v) + } + + // Then + sideEffect shouldBe Some("8.11.0") + } + + "support ElasticResult operations - filter" in { + // Given - Success with filter passing + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + val filtered1 = versionApi.version.filter(_.startsWith("8"), "Not ES 8.x") + + // Given - Success with filter failing + versionApi = new TestVersionApi() + versionApi.executeVersionResult = ElasticSuccess("7.17.0") + val filtered2 = versionApi.version.filter(_.startsWith("8"), "Not ES 8.x") + + // Then + filtered1.isSuccess shouldBe true + filtered1.get shouldBe "8.11.0" + + filtered2.isFailure shouldBe true + filtered2.error.get.message shouldBe "Not ES 8.x" + } + + "throw exception when calling get on failure" in { + // Given + versionApi.executeVersionResult = ElasticFailure(ElasticError("Connection failed")) + + // When & Then + val exception = intercept[NoSuchElementException] { + versionApi.version.get + } + + exception.getMessage should include("Connection failed") + } + + "handle concurrent calls correctly" in { + // Given + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + + // When - Simulate concurrent calls + val results = (1 to 10).map(_ => versionApi.version) + + // Then - All should return the same cached version + results.foreach { result => + result shouldBe ElasticSuccess("8.11.0") + } + + // Should only log once (first call) + verify(mockLogger, times(1)).info(contains("8.11.0")) + } + + "handle error with full context" in { + // Given + val error = ElasticError( + message = "Connection timeout", + cause = Some(new java.net.SocketTimeoutException()), + statusCode = Some(504), + index = Some("my-index"), + operation = Some("executeVersion") + ) + versionApi.executeVersionResult = ElasticFailure(error) + + // When + val result = versionApi.version + + // Then + result.error.get.fullMessage should include("executeVersion") + result.error.get.fullMessage should include("my-index") + result.error.get.fullMessage should include("504") + result.error.get.fullMessage should include("Connection timeout") + } + + "handle empty version string" in { + // Given + versionApi.executeVersionResult = ElasticSuccess("") + + // When + val result = versionApi.version + + // Then + result.isSuccess shouldBe true + result.get shouldBe "" + + verify(mockLogger).info(contains("")) + } + + "not call executeVersion on subsequent calls after success" in { + // Given + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + var callCount = 0 + + // Override to count calls + val countingApi = new VersionApi with SerializationApi { + override protected def logger: Logger = mockLogger + override private[client] def executeVersion(): ElasticResult[String] = { + callCount += 1 + ElasticSuccess("8.11.0") + } + } + + // When + countingApi.version + countingApi.version + countingApi.version + + // Then + callCount shouldBe 1 // Only called once! + } + + "call executeVersion on each call after failure" in { + // Given + var callCount = 0 + + val failingApi = new VersionApi with SerializationApi { + override protected def logger: Logger = mockLogger + override private[client] def executeVersion(): ElasticResult[String] = { + callCount += 1 + ElasticFailure(ElasticError("Always fails")) + } + } + + // When + failingApi.version + failingApi.version + failingApi.version + + // Then + callCount shouldBe 3 // Called every time! + } + } + + "ElasticResult integration" should { + + "work with map transformation" in { + // Given + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + + // When + val result = versionApi.version + .map(_.split("\\.").head.toInt) + .map(_ >= 8) + + // Then + result shouldBe ElasticSuccess(true) + } + + "work with flatMap composition" in { + // Given + versionApi.executeVersionResult = ElasticSuccess("8.11.0") + + // When + val result = versionApi.version.flatMap { version => + val major = version.split("\\.").head.toInt + if (major >= 8) ElasticSuccess(s"Compatible: $version") + else ElasticFailure(ElasticError(s"Incompatible version: $version")) + } + + // Then + result.isSuccess shouldBe true + result.get shouldBe "Compatible: 8.11.0" + } + + "propagate errors through transformations" in { + // Given + versionApi.executeVersionResult = ElasticFailure(ElasticError("Network error")) + + // When + val result = versionApi.version + .map(_.toUpperCase) + .flatMap(v => ElasticSuccess(s"Version: $v")) + .filter(_.contains("8")) + + // Then + result.isFailure shouldBe true + result.error.get.message shouldBe "Network error" + } + } + } +} diff --git a/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticClientSpec.scala b/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticClientSpec.scala index bff99219..41475113 100644 --- a/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticClientSpec.scala +++ b/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticClientSpec.scala @@ -1,7 +1,27 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package app.softnetwork.elastic.client import akka.actor.ActorSystem +import akka.stream.scaladsl.Sink import app.softnetwork.elastic.model.{Binary, Child, Parent, Sample} +import app.softnetwork.elastic.client.bulk._ +import app.softnetwork.elastic.client.result.{ElasticFailure, ElasticSuccess} +import app.softnetwork.elastic.client.scroll._ import app.softnetwork.elastic.persistence.query.ElasticProvider import app.softnetwork.elastic.scalatest.ElasticDockerTestKit import app.softnetwork.elastic.sql.query.SQLQuery @@ -19,9 +39,10 @@ import _root_.java.nio.file.{Files, Paths} import _root_.java.time.format.DateTimeFormatter import _root_.java.util.UUID import _root_.java.util.concurrent.TimeUnit +import java.time.temporal.Temporal import java.time.{LocalDate, LocalDateTime, ZoneOffset} import scala.concurrent.duration.Duration -import scala.concurrent.{Await, ExecutionContextExecutor} +import scala.concurrent.{Await, ExecutionContextExecutor, Future} import scala.util.{Failure, Success, Try} /** Created by smanciot on 28/06/2018. @@ -34,10 +55,20 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M implicit val executionContext: ExecutionContextExecutor = system.dispatcher - def pClient: ElasticProvider[Person] with ElasticClientApi - def sClient: ElasticProvider[Sample] with ElasticClientApi - def bClient: ElasticProvider[Binary] with ElasticClientApi - def parentClient: ElasticProvider[Parent] with ElasticClientApi + import ElasticProviders._ + + lazy val pClient: ElasticProvider[Person] = new PersonProvider( + elasticConfig + ) + lazy val sClient: ElasticProvider[Sample] = new SampleProvider( + elasticConfig + ) + lazy val bClient: ElasticProvider[Binary] = new BinaryProvider( + elasticConfig + ) + lazy val parentClient: ElasticProvider[Parent] = new ParentProvider( + elasticConfig + ) import scala.language.implicitConversions @@ -80,7 +111,14 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "Adding an alias and then removing it" should "work" in { pClient.addAlias("person", "person_alias") - doesAliasExists("person_alias") shouldBe true + pClient.aliasExists("person_alias").get shouldBe true + + pClient.getAliases("person") match { + case ElasticSuccess(aliases) => + aliases should contain("person_alias") + case ElasticFailure(elasticError) => + fail(elasticError.fullMessage) + } pClient.removeAlias("person", "person_alias") @@ -88,16 +126,18 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M } "Toggle refresh" should "work" in { - pClient.toggleRefresh("person", enable = false) + pClient.toggleRefresh("person", enable = false).get shouldBe true + var settings = pClient.loadSettings("person").get new JsonParser() - .parse(pClient.loadSettings("person")) + .parse(settings) .getAsJsonObject .get("refresh_interval") .getAsString shouldBe "-1" - pClient.toggleRefresh("person", enable = true) + pClient.toggleRefresh("person", enable = true).get shouldBe true + settings = pClient.loadSettings("person").get new JsonParser() - .parse(pClient.loadSettings("person")) + .parse(settings) .getAsJsonObject .get("refresh_interval") .getAsString shouldBe "1s" @@ -116,14 +156,14 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M pClient.setReplicas("person", 3) new JsonParser() - .parse(pClient.loadSettings("person")) + .parse(pClient.loadSettings("person").get) .getAsJsonObject .get("number_of_replicas") .getAsString shouldBe "3" pClient.setReplicas("person", 0) new JsonParser() - .parse(pClient.loadSettings("person")) + .parse(pClient.loadSettings("person").get) .getAsJsonObject .get("number_of_replicas") .getAsString shouldBe "0" @@ -159,17 +199,20 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M | } | } |}""".stripMargin.replaceAll("\n", "").replaceAll("\\s+", "") - pClient.setMapping("person_mapping", mapping) shouldBe true + pClient.setMapping("person_mapping", mapping).get shouldBe true - val properties = pClient.getMappingProperties("person_mapping") + val properties = pClient.getMappingProperties("person_mapping").get log.info(s"properties: $properties") MappingComparator.isMappingDifferent( properties, mapping ) shouldBe false - implicit val bulkOptions: BulkOptions = BulkOptions("person_mapping", "_doc", 1000) - val indices = pClient.bulk[String](persons.iterator, identity, Some("uuid"), None, None) + implicit val bulkOptions: BulkOptions = BulkOptions("person_mapping") + val result = pClient.bulk[String](persons.iterator, identity, idKey = Some("uuid")).get + result.failedCount shouldBe 0 + result.successCount shouldBe persons.size + val indices = result.indices indices.forall(index => refresh(index).getStatusLine.getStatusCode < 400) shouldBe true pClient.flush("person_mapping") @@ -179,33 +222,45 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "person_mapping" should haveCount(3) - pClient.search[Person]("select * from person_mapping") match { - case r if r.size == 3 => - r.map(_.uuid) should contain allOf ("A12", "A14", "A16") - case other => fail(other.toString) + pClient.searchAs[Person]("select * from person_mapping") match { + case ElasticSuccess(value) => + value match { + case r if r.size == 3 => + r.map(_.uuid) should contain allOf ("A12", "A14", "A16") + case other => fail(other.toString) + } + case ElasticFailure(elasticError) => + fail(elasticError.fullMessage) } - pClient.search[Person]("select * from person_mapping where uuid = 'A16'") match { + pClient.searchAs[Person]("select * from person_mapping where uuid = 'A16'").get match { case r if r.size == 1 => r.map(_.uuid) should contain only "A16" case other => fail(other.toString) } - pClient.search[Person]("select * from person_mapping where match (name) against ('gum')") match { + pClient + .searchAs[Person]( + "select * from person_mapping where match (name) against ('gum')" + ) + .get match { case r if r.size == 1 => r.map(_.uuid) should contain only "A16" case other => fail(other.toString) } - pClient.search[Person]( - "select * from person_mapping where uuid <> 'A16' and match (name) against ('gum')" - ) match { + pClient + .searchAs[Person]( + "select * from person_mapping where uuid <> 'A16' and match (name) against ('gum')" + ) + .get match { case r if r.isEmpty => case other => fail(other.toString) } } "Updating a mapping" should "work" in { + pClient.createIndex("person_migration").get shouldBe true val mapping = """{ | "properties": { @@ -224,12 +279,15 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M | } |} """.stripMargin.replaceAll("\n", "").replaceAll("\\s+", "") - pClient.updateMapping("person_migration", mapping) shouldBe true + pClient.setMapping("person_migration", mapping).get shouldBe true blockUntilIndexExists("person_migration") "person_migration" should beCreated() - implicit val bulkOptions: BulkOptions = BulkOptions("person_migration", "_doc", 1000) - val indices = pClient.bulk[String](persons.iterator, identity, Some("uuid"), None, None) + implicit val bulkOptions: BulkOptions = BulkOptions("person_migration") + val result = pClient.bulk[String](persons.iterator, identity, idKey = Some("uuid")).get + result.failedCount shouldBe 0 + result.successCount shouldBe persons.size + val indices = result.indices indices.forall(index => refresh(index).getStatusLine.getStatusCode < 400) shouldBe true pClient.flush("person_migration") @@ -239,7 +297,11 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "person_migration" should haveCount(3) - pClient.search[Person]("select * from person_migration where match (name) against ('gum')") match { + pClient + .searchAs[Person]( + "select * from person_migration where match (name) against ('gum')" + ) + .get match { case r if r.isEmpty => case other => fail(other.toString) } @@ -285,10 +347,14 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M | } |} """.stripMargin.replaceAll("\n", "").replaceAll("\\s+", "") - pClient.shouldUpdateMapping("person_migration", newMapping) shouldBe true - pClient.updateMapping("person_migration", newMapping) shouldBe true + pClient.shouldUpdateMapping("person_migration", newMapping).get shouldBe true + pClient.updateMapping("person_migration", newMapping).get shouldBe true - pClient.search[Person]("select * from person_migration where match (name) against ('gum')") match { + pClient + .searchAs[Person]( + "select * from person_migration where match (name) against ('gum')" + ) + .get match { case r if r.size == 1 => r.map(_.uuid) should contain only "A16" case other => fail(other.toString) @@ -297,8 +363,12 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M } "Bulk index valid json without id key and suffix key" should "work" in { - implicit val bulkOptions: BulkOptions = BulkOptions("person1", "person", 2) - val indices = pClient.bulk[String](persons.iterator, identity, None, None, None) + implicit val bulkOptions: BulkOptions = + BulkOptions("person1", "person", 2) // small chunk size to test multiple bulk requests + val result = pClient.bulk[String](persons.iterator, identity).get + result.failedCount shouldBe 0 + result.successCount shouldBe persons.size + val indices = result.indices indices should contain only "person1" @@ -306,7 +376,7 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "person1" should haveCount(3) - pClient.search[Person]("select * from person1") match { + pClient.searchAs[Person]("select * from person1").get match { case r if r.size == 3 => r.map(_.uuid) should contain allOf ("A12", "A14", "A16") r.map(_.name) should contain allOf ("Homer Simpson", "Moe Szyslak", "Barney Gumble") @@ -316,10 +386,13 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M } "Bulk index valid json with an id key but no suffix key" should "work" in { - implicit val bulkOptions: BulkOptions = BulkOptions("person2", "person", 1000) - val indices = pClient.bulk[String](persons.iterator, identity, Some("uuid"), None, None) + implicit val bulkOptions: BulkOptions = BulkOptions("person2") + val result = pClient.bulk[String](persons.iterator, identity, idKey = Some("uuid")).get + result.failedCount shouldBe 0 + result.successCount shouldBe persons.size + val indices = result.indices indices.forall(index => refresh(index).getStatusLine.getStatusCode < 400) shouldBe true - pClient.flush("person2") + pClient.flush("person2").get shouldBe true indices should contain only "person2" @@ -327,7 +400,7 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "person2" should haveCount(3) - pClient.search[Person]("select * from person2") match { + pClient.searchAs[Person]("select * from person2").get match { case r if r.size == 3 => r.map(_.uuid) should contain allOf ("A12", "A14", "A16") r.map(_.name) should contain allOf ("Homer Simpson", "Moe Szyslak", "Barney Gumble") @@ -345,9 +418,19 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M } "Bulk index valid json with an id key and a suffix key" should "work" in { - implicit val bulkOptions: BulkOptions = BulkOptions("person", "person", 1000) - val indices = - pClient.bulk[String](persons.iterator, identity, Some("uuid"), Some("birthDate"), None, None) + implicit val bulkOptions: BulkOptions = BulkOptions("person", "person") + val result = + pClient + .bulk[String]( + persons.iterator, + identity, + idKey = Some("uuid"), + suffixDateKey = Some("birthDate") + ) + .get + result.failedCount shouldBe 0 + result.successCount shouldBe persons.size + val indices = result.indices indices.forall(index => refresh(index).getStatusLine.getStatusCode < 400) shouldBe true indices should contain allOf ("person-1967-11-21", "person-1969-05-09") @@ -358,7 +441,7 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "person-1967-11-21" should haveCount(2) "person-1969-05-09" should haveCount(1) - pClient.search[Person]("select * from person-1967-11-21, person-1969-05-09") match { + pClient.searchAs[Person]("select * from person-1967-11-21, person-1969-05-09").get match { case r if r.size == 3 => r.map(_.uuid) should contain allOf ("A12", "A14", "A16") r.map(_.name) should contain allOf ("Homer Simpson", "Moe Szyslak", "Barney Gumble") @@ -376,18 +459,27 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M } "Bulk index invalid json with an id key and a suffix key" should "work" in { - implicit val bulkOptions: BulkOptions = BulkOptions("person_error", "person", 1000) + implicit val bulkOptions: BulkOptions = BulkOptions("person_error") intercept[JsonParseException] { val invalidJson = persons :+ "fail" - pClient.bulk[String](invalidJson.iterator, identity, None, None, None) + pClient.bulk[String](invalidJson.iterator, identity).get } } "Bulk upsert valid json with an id key but no suffix key" should "work" in { - implicit val bulkOptions: BulkOptions = BulkOptions("person4", "person", 1000) - val indices = + implicit val bulkOptions: BulkOptions = BulkOptions("person4") + val result = pClient - .bulk[String](personsWithUpsert.iterator, identity, Some("uuid"), None, None, Some(true)) + .bulk[String]( + personsWithUpsert.iterator, + identity, + idKey = Some("uuid"), + update = Some(true) + ) + .get + result.failedCount shouldBe 0 + result.successCount > 0 shouldBe true //personsWithUpsert.size + val indices = result.indices indices.forall(index => refresh(index).getStatusLine.getStatusCode < 400) shouldBe true indices should contain only "person4" @@ -396,7 +488,7 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "person4" should haveCount(3) - pClient.search[Person]("select * from person4") match { + pClient.searchAs[Person]("select * from person4").get match { case r if r.size == 3 => r.map(_.uuid) should contain allOf ("A12", "A14", "A16") r.map(_.name) should contain allOf ("Homer Simpson", "Moe Szyslak", "Barney Gumble2") @@ -414,15 +506,20 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M } "Bulk upsert valid json with an id key and a suffix key" should "work" in { - implicit val bulkOptions: BulkOptions = BulkOptions("person5", "person", 1000) - val indices = pClient.bulk[String]( - personsWithUpsert.iterator, - identity, - Some("uuid"), - Some("birthDate"), - None, - Some(true) - ) + pClient.createIndex("person5").get shouldBe true + implicit val bulkOptions: BulkOptions = BulkOptions("person5") + val result = pClient + .bulk[String]( + personsWithUpsert.iterator, + identity, + idKey = Some("uuid"), + suffixDateKey = Some("birthDate"), + update = Some(true) + ) + .get + result.failedCount shouldBe 0 + result.successCount > 0 shouldBe true // personsWithUpsert.size + val indices = result.indices indices.forall(index => refresh(index).getStatusLine.getStatusCode < 400) shouldBe true indices should contain allOf ("person5-1967-11-21", "person5-1969-05-09") @@ -433,7 +530,7 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "person5-1967-11-21" should haveCount(2) "person5-1969-05-09" should haveCount(1) - pClient.search[Person]("select * from person5-1967-11-21, person5-1969-05-09") match { + pClient.searchAs[Person]("select * from person5-1967-11-21, person5-1969-05-09").get match { case r if r.size == 3 => r.map(_.uuid) should contain allOf ("A12", "A14", "A16") r.map(_.name) should contain allOf ("Homer Simpson", "Moe Szyslak", "Barney Gumble2") @@ -451,10 +548,19 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M } "Count" should "work" in { - implicit val bulkOptions: BulkOptions = BulkOptions("person6", "person", 1000) - val indices = + implicit val bulkOptions: BulkOptions = BulkOptions("person6") + val result = pClient - .bulk[String](personsWithUpsert.iterator, identity, Some("uuid"), None, None, Some(true)) + .bulk[String]( + personsWithUpsert.iterator, + identity, + idKey = Some("uuid"), + update = Some(true) + ) + .get + result.failedCount shouldBe 0 + result.successCount > 0 shouldBe true //personsWithUpsert.size + val indices = result.indices indices.forall(index => refresh(index).getStatusLine.getStatusCode < 400) shouldBe true indices should contain only "person6" @@ -466,21 +572,31 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M import scala.collection.immutable._ pClient - .count(JSONQuery("{}", Seq[String]("person6"), Seq[String]())) + .count(ElasticQuery("{}", Seq[String]("person6"), Seq[String]())) + .get .getOrElse(0d) .toInt should ===(3) - pClient.countAsync(JSONQuery("{}", Seq[String]("person6"), Seq[String]())).complete() match { - case Success(s) => s.getOrElse(0d).toInt should ===(3) + pClient.countAsync(ElasticQuery("{}", Seq[String]("person6"), Seq[String]())).complete() match { + case Success(s) => s.get.getOrElse(0d).toInt should ===(3) case Failure(f) => fail(f.getMessage) } } "Search" should "work" in { - implicit val bulkOptions: BulkOptions = BulkOptions("person7", "person", 1000) - val indices = + implicit val bulkOptions: BulkOptions = BulkOptions("person7") + val result = pClient - .bulk[String](personsWithUpsert.iterator, identity, Some("uuid"), None, None, Some(true)) + .bulk[String]( + personsWithUpsert.iterator, + identity, + idKey = Some("uuid"), + update = Some(true) + ) + .get + result.failedCount shouldBe 0 + result.successCount > 0 shouldBe true //personsWithUpsert.size + val indices = result.indices indices.forall(index => refresh(index).getStatusLine.getStatusCode < 400) shouldBe true indices should contain only "person7" @@ -489,23 +605,25 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "person7" should haveCount(3) - val r1 = pClient.search[Person]("select * from person7") + val r1 = pClient.searchAs[Person]("select * from person7").get r1.size should ===(3) r1.map(_.uuid) should contain allOf ("A12", "A14", "A16") - pClient.searchAsync[Person]("select * from person7") onComplete { - case Success(r) => + pClient.searchAsyncAs[Person]("select * from person7") onComplete { + case Success(s) => + val r = s.get r.size should ===(3) r.map(_.uuid) should contain allOf ("A12", "A14", "A16") case Failure(f) => fail(f.getMessage) } - val r2 = pClient.search[Person]("select * from person7 where _id=\"A16\"") + val r2 = pClient.searchAs[Person]("select * from person7 where _id=\"A16\"").get r2.size should ===(1) r2.map(_.uuid) should contain("A16") - pClient.searchAsync[Person]("select * from person7 where _id=\"A16\"") onComplete { - case Success(r) => + pClient.searchAsyncAs[Person]("select * from person7 where _id=\"A16\"") onComplete { + case Success(s) => + val r = s.get r.size should ===(1) r.map(_.uuid) should contain("A16") case Failure(f) => fail(f.getMessage) @@ -513,10 +631,19 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M } "Get all" should "work" in { - implicit val bulkOptions: BulkOptions = BulkOptions("person8", "person", 1000) - val indices = + implicit val bulkOptions: BulkOptions = BulkOptions("person8") + val result = pClient - .bulk[String](personsWithUpsert.iterator, identity, Some("uuid"), None, None, Some(true)) + .bulk[String]( + personsWithUpsert.iterator, + identity, + idKey = Some("uuid"), + update = Some(true) + ) + .get + result.failedCount shouldBe 0 + result.successCount > 0 shouldBe true //personsWithUpsert.size + val indices = result.indices indices.forall(index => refresh(index).getStatusLine.getStatusCode < 400) shouldBe true indices should contain only "person8" @@ -525,17 +652,26 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "person8" should haveCount(3) - val response = pClient.search[Person]("select * from person8") + val response = pClient.searchAs[Person]("select * from person8").get response.size should ===(3) } "Get" should "work" in { - implicit val bulkOptions: BulkOptions = BulkOptions("person9", "person", 1000) - val indices = + implicit val bulkOptions: BulkOptions = BulkOptions("person9") + val result = pClient - .bulk[String](personsWithUpsert.iterator, identity, Some("uuid"), None, None, Some(true)) + .bulk[String]( + personsWithUpsert.iterator, + identity, + idKey = Some("uuid"), + update = Some(true) + ) + .get + result.failedCount shouldBe 0 + result.successCount > 0 shouldBe true //personsWithUpsert.size + val indices = result.indices indices.forall(index => refresh(index).getStatusLine.getStatusCode < 400) shouldBe true indices should contain only "person9" @@ -544,13 +680,14 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "person9" should haveCount(3) - val response = pClient.get[Person]("A16", Some("person9")) + val response = pClient.getAs[Person]("A16", Some("person9")).get response.isDefined shouldBe true response.get.uuid shouldBe "A16" - pClient.getAsync[Person]("A16", Some("person9")).complete() match { - case Success(r) => + pClient.getAsyncAs[Person]("A16", Some("person9")).complete() match { + case Success(s) => + val r = s.get r.isDefined shouldBe true r.get.uuid shouldBe "A16" case Failure(f) => fail(f.getMessage) @@ -560,15 +697,15 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "Index" should "work" in { val uuid = UUID.randomUUID().toString val sample = Sample(uuid) - val result = sClient.index(sample) + val result = sClient.indexAs(sample, uuid).get result shouldBe true - sClient.indexAsync(sample).complete() match { - case Success(r) => r shouldBe true + sClient.indexAsyncAs(sample, uuid).complete() match { + case Success(r) => r.get shouldBe true case Failure(f) => fail(f.getMessage) } - val result2 = sClient.get[Sample](uuid) + val result2 = sClient.getAs[Sample](uuid).get result2 match { case Some(r) => r.uuid shouldBe uuid @@ -580,15 +717,15 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "Update" should "work" in { val uuid = UUID.randomUUID().toString val sample = Sample(uuid) - val result = sClient.update(sample) + val result = sClient.updateAs(sample, uuid).get result shouldBe true - sClient.updateAsync(sample).complete() match { - case Success(r) => r shouldBe true + sClient.updateAsyncAs(sample, uuid).complete() match { + case Success(r) => r.get shouldBe true case Failure(f) => fail(f.getMessage) } - val result2 = sClient.get[Sample](uuid) + val result2 = sClient.getAs[Sample](uuid).get result2 match { case Some(r) => r.uuid shouldBe uuid @@ -600,38 +737,38 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "Delete" should "work" in { val uuid = UUID.randomUUID().toString val index = s"sample-$uuid" - sClient.createIndex(index) shouldBe true + sClient.createIndex(index).get shouldBe true val sample = Sample(uuid) - val result = sClient.index(sample, Some(index)) + val result = sClient.indexAs(sample, uuid, Some(index)).get result shouldBe true - sClient.delete(sample.uuid, index) shouldBe true + sClient.delete(sample.uuid, index).get shouldBe true //blockUntilEmpty(index) - sClient.get[Sample](uuid).isEmpty shouldBe true + sClient.getAs[Sample](uuid).isFailure shouldBe true // 404 } "Delete asynchronously" should "work" in { val uuid = UUID.randomUUID().toString val index = s"sample-$uuid" - sClient.createIndex(index) shouldBe true + sClient.createIndex(index).get shouldBe true val sample = Sample(uuid) - val result = sClient.index(sample, Some(index)) + val result = sClient.indexAs(sample, uuid, Some(index)).get result shouldBe true sClient.deleteAsync(sample.uuid, index).complete() match { - case Success(r) => r shouldBe true + case Success(r) => r.get shouldBe true case Failure(f) => fail(f.getMessage) } // blockUntilEmpty(index) - sClient.get[Sample](uuid).isEmpty shouldBe true + sClient.getAs[Sample](uuid).isFailure shouldBe true // 404 } "Index binary data" should "work" in { - bClient.createIndex("binaries") shouldBe true + bClient.createIndex("binaries").get shouldBe true val mapping = """{ | "properties": { @@ -653,8 +790,8 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M | } |} """.stripMargin.replaceAll("\n", "").replaceAll("\\s+", "") - bClient.setMapping("binaries", mapping) shouldBe true - bClient.shouldUpdateMapping("binaries", mapping) shouldBe false + bClient.setMapping("binaries", mapping).get shouldBe true + bClient.shouldUpdateMapping("binaries", mapping).get shouldBe false for (uuid <- Seq("png", "jpg", "pdf")) { Try( Paths.get(Thread.currentThread().getContextClassLoader.getResource(s"avatar.$uuid").getPath) @@ -669,8 +806,8 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M content = encoded, md5 = hashStream(new ByteArrayInputStream(decodeBase64(encoded))).getOrElse("") ) - bClient.index(binary) shouldBe true - bClient.get[Binary](uuid) match { + bClient.indexAs(binary, uuid).get shouldBe true + bClient.getAs[Binary](uuid).get match { case Some(result) => val decoded = decodeBase64(result.content) val out = Paths.get(s"/tmp/${path.getFileName}") @@ -687,7 +824,7 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M } "Aggregations" should "work" in { - pClient.createIndex("person10") shouldBe true + pClient.createIndex("person10").get shouldBe true val mapping = """{ | "properties": { @@ -719,14 +856,23 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M |} """.stripMargin.replaceAll("\n", "").replaceAll("\\s+", "") log.info(s"mapping: $mapping") - pClient.setMapping("person10", mapping) shouldBe true + pClient.setMapping("person10", mapping).get shouldBe true - implicit val bulkOptions: BulkOptions = BulkOptions("person10", "_doc", 1000) - val indices = + implicit val bulkOptions: BulkOptions = BulkOptions("person10") + val result = pClient - .bulk[String](personsWithUpsert.iterator, identity, Some("uuid"), None, None, Some(true)) + .bulk[String]( + personsWithUpsert.iterator, + identity, + idKey = Some("uuid"), + update = Some(true) + ) + .get + result.failedCount shouldBe 0 + result.successCount > 0 shouldBe true //personsWithUpsert.size + val indices = result.indices indices.forall(index => refresh(index).getStatusLine.getStatusCode < 400) shouldBe true - pClient.flush("person10") + pClient.flush("person10").get shouldBe true indices should contain only "person10" @@ -734,7 +880,7 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "person10" should haveCount(3) - pClient.get[Person]("A16", Some("person10")) match { + pClient.getAs[Person]("A16", Some("person10")).get match { case Some(p) => p.uuid shouldBe "A16" p.birthDate shouldBe "1969-05-09" @@ -747,13 +893,15 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "select count(distinct p.uuid) as c from person10 p" ) .complete() match { - case Success(s) => s.headOption.flatMap(_.asDoubleOption).getOrElse(0d) should ===(3d) + case Success(s) => + s.get.headOption.flatMap(_.asDoubleSafe.toOption).getOrElse(0d) should ===(3d) case Failure(f) => fail(f.getMessage) } // test count aggregation pClient.aggregate("select count(p.uuid) as c from person10 p").complete() match { - case Success(s) => s.headOption.flatMap(_.asDoubleOption).getOrElse(0d) should ===(3d) + case Success(s) => + s.get.headOption.flatMap(_.asDoubleSafe.toOption).getOrElse(0d) should ===(3d) case Failure(f) => fail(f.getMessage) } @@ -761,13 +909,17 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M pClient.aggregate("select max(p.birthDate) as c from person10 p").complete() match { case Success(s) => // The maximum date should be the latest birthDate in the dataset - s.headOption match { - case Some(value) if value.isDouble => - value.asDoubleOption.getOrElse(0d) should ===( - LocalDate.parse("1969-05-09").toEpochDay.toDouble * 3600 * 24 * 1000 - ) - case Some(value) if value.isString => - value.asStringOption.getOrElse("") should ===("1969-05-09T00:00:00.000Z") + s.get.headOption match { + case Some(value) => + value.asDoubleSafe.orElse(value.asStringSafe).toOption match { + case Some(d: Double) => + d should ===( + LocalDate.parse("1969-05-09").toEpochDay.toDouble * 3600 * 24 * 1000 + ) + case Some(s: String) => + s should ===("1969-05-09T00:00:00.000Z") + case _ => fail(s"Unexpected value type: ${value.prettyPrint}") + } case None => fail("No result found for max aggregation") } case Failure(f) => fail(f.getMessage) @@ -777,13 +929,17 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M pClient.aggregate("select min(p.birthDate) as c from person10 p").complete() match { case Success(s) => // The minimum date should be the earliest birthDate in the dataset - s.headOption match { - case Some(value) if value.isDouble => - value.asDoubleOption.getOrElse(0d) should ===( - LocalDate.parse("1967-11-21").toEpochDay.toDouble * 3600 * 24 * 1000 - ) - case Some(value) if value.isString => - value.asStringOption.getOrElse("") should ===("1967-11-21T00:00:00.000Z") + s.get.headOption match { + case Some(value) => + value.asDoubleSafe.orElse(value.asStringSafe).toOption match { + case Some(d: Double) => + d should ===( + LocalDate.parse("1967-11-21").toEpochDay.toDouble * 3600 * 24 * 1000 + ) + case Some(s: String) => + s should ===("1967-11-21T00:00:00.000Z") + case _ => fail(s"Unexpected value type: ${value.prettyPrint}") + } case None => fail("No result found for min aggregation") } case Failure(f) => fail(f.getMessage) @@ -793,16 +949,21 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M pClient.aggregate("select avg(p.birthDate) as c from person10 p").complete() match { case Success(s) => // The average date should be the midpoint between the min and max dates - s.headOption match { - case Some(value) if value.isDouble => - value.asDoubleOption.getOrElse(0d) should ===( - LocalDateTime - .parse("1968-05-17T08:00:00.000Z", DateTimeFormatter.ISO_OFFSET_DATE_TIME) - .toInstant(ZoneOffset.UTC) - .toEpochMilli - ) - case Some(value) if value.isString => - value.asStringOption.getOrElse("") should ===("1968-05-17T08:00:00.000Z") + s.get.headOption match { + case Some(value) => + value.asDoubleSafe.orElse(value.asStringSafe).toOption match { + case Some(d: Double) => + d should ===( + LocalDateTime + .parse("1968-05-17T08:00:00.000Z", DateTimeFormatter.ISO_OFFSET_DATE_TIME) + .toInstant(ZoneOffset.UTC) + .toEpochMilli + .toDouble + ) + case Some(s: String) => + s should ===("1968-05-17T08:00:00.000Z") + case _ => fail(s"Unexpected value type: ${value.prettyPrint}") + } case None => fail("No result found for avg aggregation") } case Failure(f) => fail(f.getMessage) @@ -815,14 +976,116 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M ) .complete() match { case Success(s) => - s.headOption.flatMap(_.asDoubleOption).getOrElse(0d) should ===(2d) + s.get.headOption.flatMap(_.asDoubleSafe.toOption).getOrElse(0d) should ===(2d) + case Failure(f) => fail(f.getMessage) + } + + // test first aggregation on date field + pClient.aggregate("select first(p.birthDate) as c from person10 p").complete() match { + case Success(s) => + s.get.headOption match { + case Some(value) => + value.asDoubleSafe.orElse(value.asStringSafe).orElse(value.asMapSafe).toOption match { + case Some(d: Double) => + d should ===( + LocalDate.parse("1967-11-21").toEpochDay.toDouble * 3600 * 24 * 1000 + ) + case Some(s: String) => + s should ===("1967-11-21T00:00:00.000Z") + case Some(m: Map[_, _]) => + // Elasticsearch 7.14+ returns an object for first/last aggregations on date fields + m.asInstanceOf[Map[String, Any]].get("birthDate") match { + case Some(t: Temporal) => + t should ===(LocalDate.parse("1967-11-21")) + case Some(d: Double) => + d should ===( + LocalDate + .parse("1967-11-21") + .toEpochDay + .toDouble * 3600 * 24 * 1000 + ) + case Some(s: String) => + s should ===("1967-11-21T00:00:00.000Z") + case other => fail(s"Unexpected value type: $other") + } + } + case None => fail("No result found for first aggregation") + } + case Failure(f) => fail(f.getMessage) + } + + // test last aggregation on date field + pClient.aggregate("select last(p.birthDate) as c from person10 p").complete() match { + case Success(s) => + s.get.headOption match { + case Some(value) => + value.asDoubleSafe.orElse(value.asStringSafe).orElse(value.asMapSafe).toOption match { + case Some(d: Double) => + d should ===( + LocalDate.parse("1969-05-09").toEpochDay.toDouble * 3600 * 24 * 1000 + ) + case Some(s: String) => + s should ===("1969-05-09T00:00:00.000Z") + case Some(m: Map[_, _]) => + // Elasticsearch 7.14+ returns an object for first/last aggregations on date fields + m.asInstanceOf[Map[String, Any]].get("birthDate") match { + case Some(t: Temporal) => + t should ===(LocalDate.parse("1969-05-09")) + case Some(d: Double) => + d should ===( + LocalDate + .parse("1969-05-09") + .toEpochDay + .toDouble * 3600 * 24 * 1000 + ) + case Some(s: String) => + s should ===("1969-05-09T00:00:00.000Z") + case other => fail(s"Unexpected value type: $other") + } + } + case None => fail("No result found for last aggregation") + } + case Failure(f) => fail(f.getMessage) + } + + // test array aggregation on String field + pClient + .aggregate( + "select array(child.name) as names from person10 p JOIN UNNEST(p.children) as child LIMIT 10" + ) + .complete() match { + case Success(s) => + val names = s.get.headOption.flatMap(_.asSeqSafe.toOption).getOrElse(Seq.empty).map { + case s: String => s + case other => fail(s"Unexpected name type: $other") + } + names should contain allOf ("Josh Gumble", "Steve Gumble") case Failure(f) => fail(f.getMessage) } + // test array aggregation on date field + pClient + .aggregate( + "select array(DISTINCT child.birthDate) as birthDates from person10 p JOIN UNNEST(p.children) as child LIMIT 10" + ) + .complete() match { + case Success(s) => + val birthDates = s.get.headOption + .flatMap(_.asSeqSafe.toOption) + .getOrElse(Seq.empty) + .map { + case t: Temporal => t + case other => fail(s"Unexpected birthDate type: $other") + } + .map(_.toString) + birthDates.nonEmpty shouldBe true + birthDates should contain allOf ("1999-05-09", "2002-05-09") // LocalDate instances sorted ASC + case Failure(f) => fail(f.getMessage) + } } "Nested queries" should "work" in { - parentClient.createIndex("parent") shouldBe true + parentClient.createIndex("parent").get shouldBe true val mapping = """{ | "properties": { @@ -862,15 +1125,24 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M |} """.stripMargin.replaceAll("\n", "").replaceAll("\\s+", "") log.info(s"mapping: $mapping") - parentClient.setMapping("parent", mapping) shouldBe true + parentClient.setMapping("parent", mapping).get shouldBe true - implicit val bulkOptions: BulkOptions = BulkOptions("parent", "_doc", 1000) - val indices = + implicit val bulkOptions: BulkOptions = BulkOptions("parent") + val bulkResult = parentClient - .bulk[String](personsWithUpsert.iterator, identity, Some("uuid"), None, None, Some(true)) + .bulk[String]( + personsWithUpsert.iterator, + identity, + idKey = Some("uuid"), + update = Some(true) + ) + .get + bulkResult.failedCount shouldBe 0 + bulkResult.successCount > 0 shouldBe true //personsWithUpsert.size + val indices = bulkResult.indices indices.forall(index => refresh(index).getStatusLine.getStatusCode < 400) shouldBe true - parentClient.flush("parent") - parentClient.refresh("parent") + parentClient.flush("parent").get shouldBe true + parentClient.refresh("parent").get shouldBe true indices should contain only "parent" @@ -878,26 +1150,28 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M "parent" should haveCount(3) - val parents = parentClient.search[Parent]("select * from parent") - assert(parents.size == 3) - - val results = parentClient.searchWithInnerHits[Parent, Child]( - """SELECT - | p.uuid, - | p.name, - | p.birthDate, - | p.children, - | inner_children.name, - | inner_children.birthDate, - | inner_children.parentId - | FROM - | parent as p - | JOIN UNNEST(p.children) as inner_children - |WHERE - | inner_children.name is not null AND p.uuid = 'A16' - |""".stripMargin, - "inner_children" - ) + val parents = parentClient.searchAs[Parent]("select * from parent") + parents.get.size shouldBe 3 + + val results = parentClient + .searchWithInnerHits[Parent, Child]( + """SELECT + | p.uuid, + | p.name, + | p.birthDate, + | p.children, + | inner_children.name, + | inner_children.birthDate, + | inner_children.parentId + | FROM + | parent as p + | JOIN UNNEST(p.children) as inner_children + |WHERE + | inner_children.name is not null AND p.uuid = 'A16' + |""".stripMargin, + "inner_children" + ) + .get results.size shouldBe 1 val result = results.head result._1.uuid shouldBe "A16" @@ -908,5 +1182,47 @@ trait ElasticClientSpec extends AnyFlatSpecLike with ElasticDockerTestKit with M _.birthDate.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")) ) should contain allOf ("1999-05-09", "2002-05-09") result._2.map(_.parentId) should contain only "A16" + + val query = + """SELECT + | p.uuid, + | p.name, + | p.birthDate, + | children.name, + | children.birthDate, + | children.parentId + | FROM + | parent as p + | JOIN UNNEST(p.children) as children + |WHERE + | children.name is not null AND p.uuid = 'A16' + |""".stripMargin + + val searchResults = parentClient.searchAs[Parent](query).get + searchResults.size shouldBe 1 + val searchResult = searchResults.head + searchResult.uuid shouldBe "A16" + searchResult.children.size shouldBe 2 + searchResult.children.map(_.name) should contain allOf ("Steve Gumble", "Josh Gumble") + searchResult.children.map( + _.birthDate.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")) + ) should contain allOf ("1999-05-09", "2002-05-09") + searchResult.children.map(_.parentId) should contain only "A16" + + val scrollResults: Future[Seq[(Parent, ScrollMetrics)]] = parentClient + .scrollAs[Parent](query, config = ScrollConfig(logEvery = 1)) + .runWith(Sink.seq) + scrollResults await { rows => + val parents = rows.map(_._1) + parents.size shouldBe 1 + val scrollResult = parents.head + scrollResult.uuid shouldBe "A16" + scrollResult.children.size shouldBe 2 + scrollResult.children.map(_.name) should contain allOf ("Steve Gumble", "Josh Gumble") + scrollResult.children.map( + _.birthDate.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")) + ) should contain allOf ("1999-05-09", "2002-05-09") + scrollResult.children.map(_.parentId) should contain only "A16" + } } } diff --git a/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticProviders.scala b/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticProviders.scala new file mode 100644 index 00000000..302851ea --- /dev/null +++ b/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticProviders.scala @@ -0,0 +1,38 @@ +package app.softnetwork.elastic.client + +import app.softnetwork.elastic.model.{Binary, Parent, Sample} +import app.softnetwork.elastic.persistence.query.ElasticProvider +import app.softnetwork.persistence.ManifestWrapper +import app.softnetwork.persistence.person.model.Person +import com.typesafe.config.Config + +object ElasticProviders { + + class PersonProvider(conf: Config) extends ElasticProvider[Person] with ManifestWrapper[Person] { + override protected val manifestWrapper: ManifestW = ManifestW() + + override lazy val config: Config = conf + + } + + class SampleProvider(conf: Config) extends ElasticProvider[Sample] with ManifestWrapper[Sample] { + override protected val manifestWrapper: ManifestW = ManifestW() + + override lazy val config: Config = conf + + } + + class BinaryProvider(conf: Config) extends ElasticProvider[Binary] with ManifestWrapper[Binary] { + override protected val manifestWrapper: ManifestW = ManifestW() + + override lazy val config: Config = conf + + } + + class ParentProvider(conf: Config) extends ElasticProvider[Parent] with ManifestWrapper[Parent] { + override protected val manifestWrapper: ManifestW = ManifestW() + + override lazy val config: Config = conf + + } +} diff --git a/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticRestClientTestKit.scala b/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticRestClientTestKit.scala index 1dfcfb1d..c2ccb919 100644 --- a/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticRestClientTestKit.scala +++ b/core/testkit/src/main/scala/app/softnetwork/elastic/client/ElasticRestClientTestKit.scala @@ -123,7 +123,6 @@ trait ElasticRestClientTestKit extends CompletionTestKit { _: { def log: Logger request.setEntity(new NStringEntity(write(payload), ContentType.APPLICATION_JSON)) val response = restClient.performRequest(request) val json = EntityUtils.toString(response.getEntity) - Console.err.println(s"Search response: $json") parse(json) } diff --git a/core/testkit/src/main/scala/app/softnetwork/elastic/client/MockElasticClientApi.scala b/core/testkit/src/main/scala/app/softnetwork/elastic/client/MockElasticClientApi.scala index 5a8fee1a..9270f542 100644 --- a/core/testkit/src/main/scala/app/softnetwork/elastic/client/MockElasticClientApi.scala +++ b/core/testkit/src/main/scala/app/softnetwork/elastic/client/MockElasticClientApi.scala @@ -1,11 +1,30 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package app.softnetwork.elastic.client import akka.NotUsed import akka.actor.ActorSystem -import akka.stream.scaladsl.Flow -import app.softnetwork.elastic.sql.query.{SQLQuery, SQLSearchRequest} +import akka.stream.scaladsl.{Flow, Source} +import app.softnetwork.elastic.client.bulk._ +import app.softnetwork.elastic.client.result.ElasticResult +import app.softnetwork.elastic.client.scroll._ +import app.softnetwork.elastic.sql.query.{SQLAggregation, SQLQuery, SQLSearchRequest} +import app.softnetwork.serialization._ import org.json4s.Formats -import app.softnetwork.persistence.model.Timestamped import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.{ExecutionContext, Future} @@ -16,186 +35,381 @@ import scala.reflect.ClassTag */ trait MockElasticClientApi extends ElasticClientApi { + def elasticVersion: String + protected lazy val logger: Logger = LoggerFactory getLogger getClass.getName - implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String = - """{ - | "query": { - | "match_all": {} - | }, - |}""".stripMargin + implicit def formats: Formats = commonFormats protected val elasticDocuments: ElasticDocuments = new ElasticDocuments() {} - override def toggleRefresh(index: String, enable: Boolean): Boolean = true + private def allDocumentsAsHits(index: String): String = { + val allDocuments = elasticDocuments.getAll + val hits = allDocuments + .map { doc => + s""" + |{ + | "_index": "$index", + | "_type": "_doc", + | "_id": "${doc._1}", + | "_score": 1.0, + | "_source": ${serialization.write(doc._2)(formats)} + |} + |""".stripMargin + } + .mkString(",") + s""" + |{ + | "took": 1, + | "timed_out": false, + | "_shards": { + | "total": 1, + | "successful": 1, + | "skipped": 0, + | "failed": 0 + | }, + | "hits": { + | "total": { + | "value": ${allDocuments.keys.size}, + | "relation": "eq" + | }, + | "max_score": 1.0, + | "hits": [ + | $hits + | ] + | } + |} + |""".stripMargin + } - override def setReplicas(index: String, replicas: Int): Boolean = true + // ==================== Closeable ==================== - override def updateSettings(index: String, settings: String) = true + override def close(): Unit = () - override def addAlias(index: String, alias: String): Boolean = true + // ==================== VersionApi ==================== - /** Remove an alias from the given index. - * - * @param index - * - the name of the index - * @param alias - * - the name of the alias - * @return - * true if the alias was removed successfully, false otherwise - */ - override def removeAlias(index: String, alias: String): Boolean = true + override private[client] def executeVersion(): ElasticResult[String] = + ElasticResult.success(elasticVersion) - override def createIndex(index: String, settings: String): Boolean = true + // ==================== IndicesApi ==================== - override def setMapping(index: String, mapping: String): Boolean = true + override private[client] def executeCreateIndex( + index: String, + settings: String + ): ElasticResult[Boolean] = + ElasticResult.success(true) - override def deleteIndex(index: String): Boolean = true + override private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = + ElasticResult.success(true) - override def closeIndex(index: String): Boolean = true + override private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = + ElasticResult.success(true) - override def openIndex(index: String): Boolean = true + override private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = + ElasticResult.success(true) - /** Reindex from source index to target index. - * - * @param sourceIndex - * - the name of the source index - * @param targetIndex - * - the name of the target index - * @param refresh - * - true to refresh the target index after reindexing, false otherwise - * @return - * true if the reindexing was successful, false otherwise - */ - override def reindex(sourceIndex: String, targetIndex: String, refresh: Boolean = true): Boolean = - true + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = + ElasticResult.success((true, Some(elasticDocuments.getAll.keys.size))) - /** Check if an index exists. - * - * @param index - * - the name of the index to check - * @return - * true if the index exists, false otherwise - */ - override def indexExists(index: String): Boolean = false + override private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = + ElasticResult.success(true) - override def count(jsonQuery: JSONQuery): Option[Double] = - throw new UnsupportedOperationException + // ==================== AliasApi ==================== - override def get[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], formats: Formats): Option[U] = - elasticDocuments.get(id).asInstanceOf[Option[U]] + override private[client] def executeAddAlias( + index: String, + alias: String + ): ElasticResult[Boolean] = + ElasticResult.success(true) - override def search[U](sqlQuery: SQLQuery)(implicit m: Manifest[U], formats: Formats): List[U] = - elasticDocuments.getAll.toList.asInstanceOf[List[U]] + override private[client] def executeRemoveAlias( + index: String, + alias: String + ): ElasticResult[Boolean] = + ElasticResult.success(true) - override def multiSearch[U]( - jsonQueries: JSONQueries - )(implicit m: Manifest[U], formats: Formats): List[List[U]] = - throw new UnsupportedOperationException + override private[client] def executeAliasExists(alias: String): ElasticResult[Boolean] = + ElasticResult.success(true) - override def index(index: String, id: String, source: String): Boolean = - throw new UnsupportedOperationException + override private[client] def executeGetAliases(index: String): ElasticResult[String] = + ElasticResult.success("{}") + + override private[client] def executeSwapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): ElasticResult[Boolean] = + ElasticResult.success(true) + + // ==================== SettingsApi ==================== + + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): ElasticResult[Boolean] = + ElasticResult.success(true) + + override private[client] def executeLoadSettings(index: String): ElasticResult[String] = { + ElasticResult.success( + s"""{"$index":{"settings":{"index":{"number_of_shards":"1","number_of_replicas":"1"}}}}""" + ) + } + + // ==================== MappingApi ==================== - override def update[U <: Timestamped]( - entity: U, - index: Option[String] = None, - maybeType: Option[String] = None, - upsert: Boolean = true - )(implicit u: ClassTag[U], formats: Formats): Boolean = { - elasticDocuments.createOrUpdate(entity) - true + override private[client] def executeSetMapping( + index: String, + mapping: String + ): ElasticResult[Boolean] = + ElasticResult.success(true) + + override private[client] def executeGetMapping(index: String): ElasticResult[String] = { + ElasticResult.success(s"""{"$index":{"mappings":{}}}""") } - override def update( + // ==================== RefreshApi ==================== + + override private[client] def executeRefresh(index: String): ElasticResult[Boolean] = + ElasticResult.success(true) + + // ==================== FlushApi ==================== + + override private[client] def executeFlush( + index: String, + force: Boolean, + wait: Boolean + ): ElasticResult[Boolean] = + ElasticResult.success(true) + + // ==================== IndexApi ==================== + + override private[client] def executeIndex( + index: String, + id: String, + source: String + ): ElasticResult[Boolean] = + ElasticResult.success { + elasticDocuments.createOrUpdate(serialization.read(source), id) + true + } + + override private[client] def executeIndexAsync( + index: String, + id: String, + source: String + )(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] = + Future { + executeIndex(index, id, source) + } + + // ==================== UpdateApi ==================== + + override private[client] def executeUpdate( index: String, id: String, source: String, upsert: Boolean - ): Boolean = { - logger.warn(s"MockElasticClient - $id not updated for $source") - false - } + ): ElasticResult[Boolean] = + ElasticResult.success { + elasticDocuments.createOrUpdate(serialization.read(source), id) + true + } - override def delete(uuid: String, index: String): Boolean = { - if (elasticDocuments.get(uuid).isDefined) { - elasticDocuments.delete(uuid) + override private[client] def executeUpdateAsync( + index: String, + id: String, + source: String, + upsert: Boolean + )(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] = + Future { + executeUpdate(index, id, source, upsert) + } + + // ==================== DeleteApi ==================== + + override private[client] def executeDelete( + index: String, + id: String + ): ElasticResult[Boolean] = + ElasticResult.success(if (elasticDocuments.get(id).isDefined) { + elasticDocuments.delete(id) true } else { false + }) + + override private[client] def executeDeleteAsync(index: String, id: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = + Future { + executeDelete(index, id) } - } - override def refresh(index: String): Boolean = true + // ==================== GetApi ==================== - override def flush(index: String, force: Boolean, wait: Boolean): Boolean = true + override private[client] def executeGet( + index: String, + id: String + ): ElasticResult[Option[String]] = + elasticDocuments.get(id) match { + case Some(doc) => ElasticResult.success(Some(serialization.write(doc)(formats))) + case None => ElasticResult.success(None) + } - override type A = this.type + override private[client] def executeGetAsync(index: String, id: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[String]]] = + Future { + executeGet(index, id) + } - override def bulk(implicit - bulkOptions: BulkOptions, - system: ActorSystem - ): Flow[Seq[A], R, NotUsed] = - throw new UnsupportedOperationException + // ==================== CountApi ==================== - override def bulkResult: Flow[R, Set[String], NotUsed] = - throw new UnsupportedOperationException + override private[client] def executeCount(query: ElasticQuery): ElasticResult[Option[Double]] = + ElasticResult.success( + Some(elasticDocuments.getAll.keys.size.toDouble) + ) - override type R = this.type + override private[client] def executeCountAsync(query: ElasticQuery)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[Double]]] = + Future { + executeCount(query) + } - override def toBulkAction(bulkItem: BulkItem): A = - throw new UnsupportedOperationException + // ==================== SearchApi ==================== - override implicit def toBulkElasticAction(a: A): BulkElasticAction = - throw new UnsupportedOperationException + override private[client] implicit def sqlSearchRequestToJsonQuery( + sqlSearch: SQLSearchRequest + ): String = + """{ + | "query": { + | "match_all": {} + | } + |}""".stripMargin - override implicit def toBulkElasticResult(r: R): BulkElasticResult = - throw new UnsupportedOperationException + override private[client] def executeSingleSearch( + elasticQuery: ElasticQuery + ): ElasticResult[Option[String]] = + ElasticResult.success( + Some( + allDocumentsAsHits( + elasticQuery.indices.headOption.getOrElse("default_index") + ) + ) + ) + + override private[client] def executeMultiSearch( + elasticQueries: ElasticQueries + ): ElasticResult[Option[String]] = + ElasticResult.success( + Some( + allDocumentsAsHits( + elasticQueries.queries.head.indices.headOption.getOrElse("default_index") + ) + ) + ) + + override private[client] def executeSingleSearchAsync(elasticQuery: ElasticQuery)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[String]]] = + Future { + executeSingleSearch(elasticQuery) + } + + override private[client] def executeMultiSearchAsync(elasticQueries: ElasticQueries)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[String]]] = + Future { + executeMultiSearch(elasticQueries) + } + + // ==================== ScrollApi ==================== + + override private[client] def scrollClassic( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation], + config: ScrollConfig + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = + Source.single(elasticDocuments.getAll).mapConcat(_.values.toList) - override def multiSearchWithInnerHits[U, I](jsonQueries: JSONQueries, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[List[(U, List[I])]] = List.empty + override private[client] def searchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = + scrollClassic( + elasticQuery, + fieldAliases, + Map.empty, + config + ) - override def search[U](jsonQuery: JSONQuery)(implicit m: Manifest[U], formats: Formats): List[U] = - List.empty + // ==================== BulkApi ==================== - override def searchWithInnerHits[U, I](jsonQuery: JSONQuery, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[(U, List[I])] = List.empty + override type BulkActionType = this.type - override def getMapping(index: String): String = + override type BulkResultType = this.type + + override private[client] def toBulkAction(bulkItem: BulkItem): BulkActionType = throw new UnsupportedOperationException - override def aggregate(sqlQuery: SQLQuery)(implicit - ec: ExecutionContext - ): Future[Seq[SingleValueAggregateResult]] = + override private[client] implicit def toBulkElasticAction(a: BulkActionType): BulkElasticAction = + throw new UnsupportedOperationException + + /** Basic flow for executing a bulk action. This method must be implemented by concrete classes + * depending on the Elasticsearch version and client used. + * + * @param bulkOptions + * configuration options + * @return + * Flow transforming bulk actions into results + */ + override private[client] def bulkFlow(implicit + bulkOptions: BulkOptions, + system: ActorSystem + ): Flow[Seq[BulkActionType], BulkResultType, NotUsed] = + throw new UnsupportedOperationException + + /** Convert a BulkResultType into individual results. This method must extract the successes and + * failures from the ES response. + * + * @param result + * raw result from the bulk + * @return + * sequence of Right(id) for success or Left(failed) for failure + */ + override private[client] def extractBulkResults( + result: BulkResultType, + originalBatch: Seq[BulkItem] + ): Seq[Either[FailedDocument, SuccessfulDocument]] = throw new UnsupportedOperationException - override def loadSettings(index: String): String = + /** Conversion BulkActionType -> BulkItem */ + override private[client] def actionToBulkItem(action: BulkActionType): BulkItem = throw new UnsupportedOperationException } trait ElasticDocuments { - private[this] var documents: Map[String, Timestamped] = Map() + private[this] var documents: Map[String, Map[String, AnyRef]] = Map() - def createOrUpdate(entity: Timestamped): Unit = { - documents = documents.updated(entity.uuid, entity) + def createOrUpdate(entity: Map[String, AnyRef], uuid: String): Unit = { + documents = documents.updated(uuid, entity) } def delete(uuid: String): Unit = { documents = documents - uuid } - def getAll: Iterable[Timestamped] = documents.values + def getAll: Map[String, Map[String, AnyRef]] = documents - def get(uuid: String): Option[Timestamped] = documents.get(uuid) + def get(uuid: String): Option[Map[String, AnyRef]] = documents.get(uuid) } diff --git a/es9/java/src/test/scala/app/softnetwork/elastic/persistence/person/ElasticsearchClientPersonHandlerSpec.scala b/core/testkit/src/main/scala/app/softnetwork/elastic/persistence/person/ElasticClientPersonHandlerSpec.scala similarity index 64% rename from es9/java/src/test/scala/app/softnetwork/elastic/persistence/person/ElasticsearchClientPersonHandlerSpec.scala rename to core/testkit/src/main/scala/app/softnetwork/elastic/persistence/person/ElasticClientPersonHandlerSpec.scala index 9d758f76..df1e41e0 100644 --- a/es9/java/src/test/scala/app/softnetwork/elastic/persistence/person/ElasticsearchClientPersonHandlerSpec.scala +++ b/core/testkit/src/main/scala/app/softnetwork/elastic/persistence/person/ElasticClientPersonHandlerSpec.scala @@ -1,7 +1,6 @@ package app.softnetwork.elastic.persistence.person import akka.actor.typed.ActorSystem -import app.softnetwork.elastic.client.java.ElasticsearchClientApi import app.softnetwork.elastic.persistence.query.{ElasticProvider, PersonToElasticProcessorStream} import app.softnetwork.persistence.ManifestWrapper import app.softnetwork.persistence.person.model.Person @@ -10,26 +9,22 @@ import app.softnetwork.persistence.query.ExternalPersistenceProvider import com.typesafe.config.Config import org.slf4j.{Logger, LoggerFactory} -import scala.concurrent.ExecutionContextExecutor - -class ElasticsearchClientPersonHandlerSpec extends ElasticPersonTestKit { - - implicit val ec: ExecutionContextExecutor = typedSystem().executionContext +trait ElasticClientPersonHandlerSpec extends ElasticPersonTestKit { override def externalPersistenceProvider: ExternalPersistenceProvider[Person] = - new ElasticProvider[Person] with ElasticsearchClientApi with ManifestWrapper[Person] { + new ElasticProvider[Person] with ManifestWrapper[Person] { override protected val manifestWrapper: ManifestW = ManifestW() - override lazy val config: Config = ElasticsearchClientPersonHandlerSpec.this.elasticConfig + override lazy val config: Config = ElasticClientPersonHandlerSpec.this.elasticConfig } override def person2ExternalProcessorStream: ActorSystem[_] => PersonToExternalProcessorStream = sys => - new PersonToElasticProcessorStream with ElasticsearchClientApi { + new PersonToElasticProcessorStream { override val forTests: Boolean = true override protected val manifestWrapper: ManifestW = ManifestW() override implicit def system: ActorSystem[_] = sys override def log: Logger = LoggerFactory getLogger getClass.getName - override lazy val config: Config = ElasticsearchClientPersonHandlerSpec.this.elasticConfig + override lazy val config: Config = ElasticClientPersonHandlerSpec.this.elasticConfig } override def log: Logger = LoggerFactory getLogger getClass.getName diff --git a/core/testkit/src/main/scala/app/softnetwork/elastic/persistence/query/PersonToElasticProcessorStream.scala b/core/testkit/src/main/scala/app/softnetwork/elastic/persistence/query/PersonToElasticProcessorStream.scala index 2c38842b..d2890998 100644 --- a/core/testkit/src/main/scala/app/softnetwork/elastic/persistence/query/PersonToElasticProcessorStream.scala +++ b/core/testkit/src/main/scala/app/softnetwork/elastic/persistence/query/PersonToElasticProcessorStream.scala @@ -11,4 +11,4 @@ trait PersonToElasticProcessorStream with PersonToExternalProcessorStream with InMemoryJournalProvider with InMemoryOffsetProvider - with ElasticProvider[Person] { _: ElasticClientApi => } + with ElasticProvider[Person] diff --git a/core/testkit/src/test/resources/reference.conf b/core/testkit/src/test/resources/reference.conf index ba8abfad..2b056f6a 100644 --- a/core/testkit/src/test/resources/reference.conf +++ b/core/testkit/src/test/resources/reference.conf @@ -1,3 +1,3 @@ akka.coordinated-shutdown.exit-jvm = off -elastic.multithreaded = false +elastic.multithreaded = true clustering.port = 0 diff --git a/documentation/client/README.md b/documentation/client/README.md new file mode 100644 index 00000000..fc37f028 --- /dev/null +++ b/documentation/client/README.md @@ -0,0 +1,20 @@ +# Client Engine Documentation + +Welcome to the Client Engine Documentation. Navigate through the sections below: + +- [Client Common Principles](common_principles.md) +- [Version Information](version.md) +- [Flush Index](flush.md) +- [Refresh Index](refresh.md) +- [Indexes Management](indices.md) +- [Settings Management](settings.md) +- [Alias Management](aliases.md) +- [Mappings Management](mappings.md) +- [Index Documents](index.md) +- [Update Documents](update.md) +- [Delete Documents](delete.md) +- [Bulk Operations](bulk.md) +- [Get Documents](get.md) +- [Search Documents](search.md) +- [Scroll Search](scroll.md) +- [Aggregations](aggregations.md) diff --git a/documentation/client/aggregations.md b/documentation/client/aggregations.md new file mode 100644 index 00000000..161d39e7 --- /dev/null +++ b/documentation/client/aggregations.md @@ -0,0 +1,1286 @@ +[Back to index](README.md) + +# AGGREGATE API + +## Overview + +The **Aggregate API** provides a powerful and type-safe way to execute aggregations on Elasticsearch data using SQL queries. It automatically extracts and converts aggregation results into strongly-typed Scala values with comprehensive error handling. + +**Key Features:** +- **SQL-based aggregations** (AVG, SUM, COUNT, MIN, MAX, etc.) +- **Type-safe result extraction** with safe casting methods +- **Multiple value types** (Boolean, Numeric, String, Temporal, Object, Arrays) +- **Error handling** for each aggregation +- **Pattern matching support** with `fold` method +- **Pretty printing** for debugging +- **Automatic type conversion** from Elasticsearch responses + +**Dependencies:** +- Requires `SearchApi` for query execution +- Requires `ElasticConversion` for response parsing + +**Roadmap** : +- Support for multi-value aggregations (currently single-value only) + +--- + +## Table of Contents + +1. [Core Concepts](#core-concepts) +2. [SingleValueAggregateResult](#singlevalueaggregateresult) +3. [Basic Usage](#basic-usage) +4. [Safe Value Extraction](#safe-value-extraction) +5. [Pattern Matching with fold](#pattern-matching-with-fold) +6. [Error Handling](#error-handling) +7. [Pretty Printing](#pretty-printing) +8. [Testing](#testing) +9. [Best Practices](#best-practices) + +--- + +## Core Concepts + +### Aggregation Result Types + +The API uses a sealed trait structure to represent different types of aggregation results: + +```scala +// Base trait for all aggregation results +sealed trait AggregateResult { + def field: String + def error: Option[String] +} + +// Metric aggregations (single values) +sealed trait MetricAggregateResult extends AggregateResult { + def aggType: AggregationType.AggregationType +} + +// Single value result (AVG, SUM, MIN, MAX, COUNT, etc.) +case class SingleValueAggregateResult( + field: String, + aggType: AggregationType.AggregationType, + value: AggregateValue, + error: Option[String] = None +) extends MetricAggregateResult +``` + +--- + +### Value Types + +The API supports multiple value types through the `AggregateValue` sealed trait: + +```scala +sealed trait AggregateValue + +// ============================================================ +// SCALAR VALUES +// ============================================================ + +// Boolean value +case class BooleanValue(value: Boolean) extends AggregateValue + +// Numeric value (supports all numeric types) +case class NumericValue(value: Number) extends AggregateValue + +// String value +case class StringValue(value: String) extends AggregateValue + +// Temporal value (dates, timestamps, etc.) +case class TemporalValue(value: Temporal) extends AggregateValue + +// Object/Map value +case class ObjectValue(value: Map[String, Any]) extends AggregateValue + +// ============================================================ +// ARRAY VALUES +// ============================================================ + +sealed trait ArrayAggregateValue[T] extends AggregateValue { + def value: Seq[T] +} + +case class ArrayOfBooleanValue(value: Seq[Boolean]) + extends ArrayAggregateValue[Boolean] + +case class ArrayOfNumericValue(value: Seq[Number]) + extends ArrayAggregateValue[Number] + +case class ArrayOfStringValue(value: Seq[String]) + extends ArrayAggregateValue[String] + +case class ArrayOfTemporalValue(value: Seq[Temporal]) + extends ArrayAggregateValue[Temporal] + +case class ArrayOfObjectValue(value: Seq[Map[String, Any]]) + extends ArrayAggregateValue[Map[String, Any]] + +// ============================================================ +// EMPTY VALUE +// ============================================================ + +case object EmptyValue extends AggregateValue +``` + +**Value Type Matrix:** + +| Type | Class | Example | Use Case | +|-----------------|------------------------|------------------------|-----------------------------| +| Boolean | `BooleanValue` | `true`, `false` | Boolean aggregations | +| Numeric | `NumericValue` | `42`, `3.14`, `100L` | AVG, SUM, MIN, MAX, COUNT | +| String | `StringValue` | `"electronics"` | Category names, labels | +| Temporal | `TemporalValue` | `2024-01-15T10:30:00Z` | Date aggregations | +| Object | `ObjectValue` | `Map("count" -> 10)` | Complex nested results | +| Array[Boolean] | `ArrayOfBooleanValue` | `Seq(true, false)` | Multi-valued boolean fields | +| Array[Numeric] | `ArrayOfNumericValue` | `Seq(1, 2, 3)` | Multi-valued numeric fields | +| Array[String] | `ArrayOfStringValue` | `Seq("a", "b")` | Multi-valued string fields | +| Array[Temporal] | `ArrayOfTemporalValue` | `Seq(date1, date2)` | Multi-valued date fields | +| Array[Object] | `ArrayOfObjectValue` | `Seq(map1, map2)` | Multi-valued object fields | +| Empty | `EmptyValue` | `null` | No data available | + +--- + +## SingleValueAggregateResult + +### Structure + +The `SingleValueAggregateResult` represents a single aggregated value from an Elasticsearch query. + +```scala +case class SingleValueAggregateResult( + field: String, // Field name being aggregated + aggType: AggregationType.AggregationType, // Type of aggregation (AVG, SUM, etc.) + value: AggregateValue, // The aggregated value + error: Option[String] = None // Optional error message +) extends MetricAggregateResult +``` + +**Fields:** + +| Field | Type | Description | +|-----------|-------------------|-----------------------------------------------------------------| +| `field` | `String` | Name of the aggregated field (e.g., "avg_price", "total_count") | +| `aggType` | `AggregationType` | Type of aggregation performed | +| `value` | `AggregateValue` | The actual aggregated value (typed) | +| `error` | `Option[String]` | Error message if aggregation failed | + +--- + +### Properties and Methods + +```scala +val result: SingleValueAggregateResult = // ... from aggregation + +// ============================================================ +// CHECK METHODS +// ============================================================ + +// Check if the result is empty +val isEmpty: Boolean = result.isEmpty + +// Check if the result has an error +val hasError: Boolean = result.hasError + +// ============================================================ +// SAFE EXTRACTION METHODS +// ============================================================ + +// Extract as Boolean +val boolResult: Try[Boolean] = result.asBooleanSafe + +// Extract as Number +val numResult: Try[Number] = result.asNumericSafe + +// Extract as Double +val doubleResult: Try[Double] = result.asDoubleSafe + +// Extract as Int +val intResult: Try[Int] = result.asIntSafe + +// Extract as Long +val longResult: Try[Long] = result.asLongSafe + +// Extract as Byte +val byteResult: Try[Byte] = result.asByteSafe + +// Extract as Short +val shortResult: Try[Short] = result.asShortSafe + +// Extract as String +val strResult: Try[String] = result.asStringSafe + +// Extract as Temporal +val tempResult: Try[Temporal] = result.asTemporalSafe + +// Extract as Map +val mapResult: Try[Map[String, Any]] = result.asMapSafe + +// Extract as Sequence +val seqResult: Try[Seq[Any]] = result.asSeqSafe + +// ============================================================ +// UTILITY METHODS +// ============================================================ + +// Get value with default +val value: Double = result.getOrElse(0.0) { + case NumericValue(n) => Some(n.doubleValue()) + case _ => None +} + +// Pattern matching with fold +val formatted: String = result.fold( + onBoolean = b => s"Boolean: $b", + onNumeric = n => s"Number: $n", + onString = s => s"String: $s", + onTemporal = t => s"Temporal: $t", + onObject = o => s"Object: $o", + onMulti = m => s"Multi: $m", + onEmpty = "Empty" +) + +// Pretty print for debugging +val prettyString: String = result.prettyPrint +``` + +--- + +## Basic Usage + +### Simple Aggregation + +```scala +import scala.concurrent.ExecutionContext.Implicits.global + +// SQL query with aggregation +val avgPriceQuery = SQLQuery( + query = """ + SELECT AVG(price) as avg_price + FROM products + WHERE category = 'electronics' + """ +) + +// Execute aggregation +client.aggregate(avgPriceQuery).foreach { + case ElasticSuccess(results) => + results.foreach { result => + println(s"Field: ${result.field}") + println(s"Type: ${result.aggType}") + + // Safe extraction + result.asDoubleSafe match { + case Success(avgPrice) => + println(f"Average price: $$${avgPrice}%.2f") + case Failure(ex) => + println(s"Failed to extract: ${ex.getMessage}") + } + } + + case ElasticFailure(error) => + println(s"❌ Aggregation failed: ${error.message}") +} +``` + +--- + +### Multiple Aggregations + +```scala +// SQL query with multiple aggregations +val statsQuery = SQLQuery( + query = """ + SELECT + AVG(price) as avg_price, + MIN(price) as min_price, + MAX(price) as max_price, + SUM(price) as total_value, + COUNT(*) as product_count + FROM products + WHERE category = 'electronics' + """ +) + +client.aggregate(statsQuery).foreach { + case ElasticSuccess(results) => + println("Product Statistics:") + + results.foreach { result => + result.field match { + case "avg_price" => + result.asDoubleSafe.foreach(v => println(f" Average: $$${v}%.2f")) + + case "min_price" => + result.asDoubleSafe.foreach(v => println(f" Minimum: $$${v}%.2f")) + + case "max_price" => + result.asDoubleSafe.foreach(v => println(f" Maximum: $$${v}%.2f")) + + case "total_value" => + result.asDoubleSafe.foreach(v => println(f" Total Value: $$${v}%,.2f")) + + case "product_count" => + result.asLongSafe.foreach(v => println(s" Count: $v products")) + + case _ => + println(s" ${result.field}: ${result.prettyPrint}") + } + } + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} +``` + +--- + +## Safe Value Extraction + +### Type-Safe Methods + +All extraction methods return `Try[T]` for safe error handling: + +```scala +val result: SingleValueAggregateResult = // ... from aggregation + +// ============================================================ +// BOOLEAN EXTRACTION +// ============================================================ + +result.asBooleanSafe match { + case Success(bool) => println(s"Boolean: $bool") + case Failure(ex) => println(s"Not a boolean: ${ex.getMessage}") +} + +// ============================================================ +// NUMERIC EXTRACTIONS +// ============================================================ + +// As generic Number +result.asNumericSafe match { + case Success(num) => println(s"Number: $num") + case Failure(ex) => println(s"Not a number: ${ex.getMessage}") +} + +// As Double +result.asDoubleSafe match { + case Success(d) => println(f"Double: $d%.2f") + case Failure(ex) => println(s"Not a double: ${ex.getMessage}") +} + +// As Int +result.asIntSafe match { + case Success(i) => println(s"Int: $i") + case Failure(ex) => println(s"Not an int: ${ex.getMessage}") +} + +// As Long +result.asLongSafe match { + case Success(l) => println(s"Long: $l") + case Failure(ex) => println(s"Not a long: ${ex.getMessage}") +} + +// As Byte +result.asByteSafe match { + case Success(b) => println(s"Byte: $b") + case Failure(ex) => println(s"Not a byte: ${ex.getMessage}") +} + +// As Short +result.asShortSafe match { + case Success(s) => println(s"Short: $s") + case Failure(ex) => println(s"Not a short: ${ex.getMessage}") +} + +// ============================================================ +// STRING EXTRACTION +// ============================================================ + +result.asStringSafe match { + case Success(str) => println(s"String: $str") + case Failure(ex) => println(s"Not a string: ${ex.getMessage}") +} + +// ============================================================ +// TEMPORAL EXTRACTION +// ============================================================ + +result.asTemporalSafe match { + case Success(temporal) => println(s"Temporal: $temporal") + case Failure(ex) => println(s"Not a temporal: ${ex.getMessage}") +} + +// ============================================================ +// MAP EXTRACTION +// ============================================================ + +result.asMapSafe match { + case Success(map) => + println("Map:") + map.foreach { case (k, v) => println(s" $k: $v") } + case Failure(ex) => println(s"Not a map: ${ex.getMessage}") +} + +// ============================================================ +// SEQUENCE EXTRACTION +// ============================================================ + +result.asSeqSafe match { + case Success(seq) => + println(s"Sequence with ${seq.size} elements:") + seq.foreach(println) + case Failure(ex) => println(s"Not a sequence: ${ex.getMessage}") +} +``` + +--- + +### getOrElse Method + +Extract values with default fallback: + +```scala +val result: SingleValueAggregateResult = // ... from aggregation + +// Extract Double with default +val avgPrice: Double = result.getOrElse(0.0) { + case NumericValue(n) => Some(n.doubleValue()) + case _ => None +} + +// Extract Int with default +val productCount: Int = result.getOrElse(0) { + case NumericValue(n) => Some(n.intValue()) + case _ => None +} + +// Extract String with default +val categoryName: String = result.getOrElse("Unknown") { + case StringValue(s) => Some(s) + case _ => None +} + +// Extract Boolean with default +val isActive: Boolean = result.getOrElse(false) { + case BooleanValue(b) => Some(b) + case _ => None +} + +// Extract Sequence with default +val prices: Seq[Double] = result.getOrElse(Seq.empty[Double]) { + case ArrayOfNumericValue(nums) => Some(nums.map(_.doubleValue())) + case _ => None +} + +// Extract Map with default +val metadata: Map[String, Any] = result.getOrElse(Map.empty[String, Any]) { + case ObjectValue(m) => Some(m) + case _ => None +} +``` + +--- + +## Pattern Matching with fold + +### Basic fold Usage + +The `fold` method provides exhaustive pattern matching for all value types: + +```scala +val result: SingleValueAggregateResult = // ... from aggregation + +val output: String = result.fold( + onBoolean = bool => s"Boolean value: $bool", + onNumeric = num => f"Numeric value: ${num.doubleValue()}%.2f", + onString = str => s"String value: '$str'", + onTemporal = temp => s"Temporal value: $temp", + onObject = obj => s"Object with ${obj.size} fields", + onMulti = seq => s"Array with ${seq.size} elements", + onEmpty = "No value" +) + +println(output) +``` + +--- + +### Advanced fold Patterns + +```scala +// ============================================================ +// CALCULATE TOTAL +// ============================================================ + +def calculateTotal(result: SingleValueAggregateResult): Double = { + result.fold( + onBoolean = _ => 0.0, + onNumeric = num => num.doubleValue(), + onString = _ => 0.0, + onTemporal = _ => 0.0, + onObject = _ => 0.0, + onMulti = seq => seq.collect { case n: Number => n.doubleValue() }.sum, + onEmpty = 0.0 + ) +} + +// ============================================================ +// FORMAT VALUE FOR DISPLAY +// ============================================================ + +def formatValue(result: SingleValueAggregateResult): String = { + result.fold( + onBoolean = bool => if (bool) "✓" else "✗", + onNumeric = num => f"${num.doubleValue()}%,.2f", + onString = str => s"\"$str\"", + onTemporal = temp => temp.toString, + onObject = obj => obj.map { case (k, v) => s"$k=$v" }.mkString("{", ", ", "}"), + onMulti = seq => seq.mkString("[", ", ", "]"), + onEmpty = "N/A" + ) +} + +// ============================================================ +// CONVERT TO JSON +// ============================================================ + +def toJson(result: SingleValueAggregateResult): String = { + val valueJson = result.fold( + onBoolean = bool => bool.toString, + onNumeric = num => num.toString, + onString = str => s"\"$str\"", + onTemporal = temp => s"\"$temp\"", + onObject = obj => obj.map { case (k, v) => s"\"$k\":\"$v\"" }.mkString("{", ",", "}"), + onMulti = seq => seq.map { + case s: String => s"\"$s\"" + case other => other.toString + }.mkString("[", ",", "]"), + onEmpty = "null" + ) + + s"""{"field":"${result.field}","type":"${result.aggType}","value":$valueJson}""" +} + +// ============================================================ +// VALIDATE VALUE +// ============================================================ + +def validateValue(result: SingleValueAggregateResult): Either[String, Any] = { + result.fold( + onBoolean = bool => Right(bool), + onNumeric = num => { + val d = num.doubleValue() + if (d.isNaN || d.isInfinite) Left("Invalid numeric value") + else Right(d) + }, + onString = str => { + if (str.trim.isEmpty) Left("Empty string") + else Right(str) + }, + onTemporal = temp => Right(temp), + onObject = obj => { + if (obj.isEmpty) Left("Empty object") + else Right(obj) + }, + onMulti = seq => { + if (seq.isEmpty) Left("Empty array") + else Right(seq) + }, + onEmpty = Left("No value present") + ) +} + +// ============================================================ +// TYPE CONVERSION +// ============================================================ + +def convertToString(result: SingleValueAggregateResult): String = { + result.fold( + onBoolean = _.toString, + onNumeric = _.toString, + onString = identity, + onTemporal = _.toString, + onObject = _.toString, + onMulti = _.mkString(", "), + onEmpty = "" + ) +} +``` + +--- + +## Error Handling + +### Checking for Errors + +```scala +client.aggregate(sqlQuery).foreach { + case ElasticSuccess(results) => + results.foreach { result => + if (result.hasError) { + println(s"⚠️ Error in ${result.field}:") + println(s" ${result.error.getOrElse("Unknown error")}") + } else if (result.isEmpty) { + println(s"ℹ️ ${result.field}: No data") + } else { + println(s"✅ ${result.field}: ${result.prettyPrint}") + } + } + + case ElasticFailure(error) => + println(s"❌ Aggregation failed: ${error.message}") +} +``` + +--- + +### Handling Individual Result Errors + +```scala +def processAggregationResult(result: SingleValueAggregateResult): Unit = { + result.error match { + case Some(errorMsg) => + println(s"❌ Aggregation '${result.field}' failed:") + println(s" Error: $errorMsg") + println(s" Type: ${result.aggType}") + + case None if result.isEmpty => + println(s"ℹ️ Aggregation '${result.field}' returned no data") + + case None => + println(s"✅ ${result.prettyPrint}") + + // Process the value + result.asDoubleSafe match { + case Success(value) => + println(f" Processed value: $value%.2f") + case Failure(ex) => + println(s" ⚠️ Type conversion failed: ${ex.getMessage}") + } + } +} + +// Usage +client.aggregate(sqlQuery).foreach { + case ElasticSuccess(results) => + results.foreach(processAggregationResult) + + case ElasticFailure(error) => + println(s"❌ Query execution failed: ${error.message}") +} +``` + +--- + +### Comprehensive Error Handler + +```scala +def handleAggregationResults( + results: Seq[SingleValueAggregateResult] +): Map[String, Either[String, Any]] = { + results.map { result => + val value = if (result.hasError) { + Left(result.error.getOrElse("Unknown error")) + } else if (result.isEmpty) { + Left("No data available") + } else { + result.fold( + onBoolean = b => Right(b), + onNumeric = n => Right(n.doubleValue()), + onString = s => Right(s), + onTemporal = t => Right(t), + onObject = o => Right(o), + onMulti = m => Right(m), + onEmpty = Left("Empty value") + ) + } + + result.field -> value + }.toMap +} + +// Usage +client.aggregate(sqlQuery).foreach { + case ElasticSuccess(results) => + val processed = handleAggregationResults(results) + + processed.foreach { + case (field, Right(value)) => + println(s"✅ $field: $value") + case (field, Left(error)) => + println(s"❌ $field: $error") + } + + case ElasticFailure(error) => + println(s"❌ Query failed: ${error.message}") +} +``` + +--- + +## Pretty Printing + +### Using prettyPrint + +```scala +client.aggregate(sqlQuery).foreach { + case ElasticSuccess(results) => + println("Aggregation Results:") + println("=" * 50) + + results.foreach { result => + println(result.prettyPrint) + } + + println("=" * 50) + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} + +// Example output: +// Aggregation Results: +// ================================================== +// AVG(price) = 599.99 +// MIN(price) = 29.99 +// MAX(price) = 1999.99 +// COUNT(*) = 150 +// SUM(price) = 89998.50 +// ================================================== +``` + +--- + +### Custom Formatting + +```scala +def formatAggregationReport(results: Seq[SingleValueAggregateResult]): String = { + val header = "AGGREGATION REPORT" + val separator = "=" * 60 + + val lines = results.map { result => + val status = if (result.hasError) "❌" else if (result.isEmpty) "⚠️" else "✅" + val value = if (result.hasError) { + result.error.getOrElse("Unknown error") + } else { + result.fold( + onBoolean = b => b.toString, + onNumeric = n => f"${n.doubleValue()}%,.2f", + onString = s => s""""$s"""", + onTemporal = t => t.toString, + onObject = o => s"Object(${o.size} fields)", + onMulti = m => s"Array(${m.size} elements)", + onEmpty = "No data" + ) + } + + f"$status ${result.aggType}%-10s ${result.field}%-20s = $value" + } + + Seq(separator, header, separator) ++ lines ++ Seq(separator) mkString "\n" +} + +// Usage +client.aggregate(sqlQuery).foreach { + case ElasticSuccess(results) => + println(formatAggregationReport(results)) + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} +``` + +--- + +### Tabular Format + +```scala +def formatAsTable(results: Seq[SingleValueAggregateResult]): String = { + val headers = Seq("Status", "Type", "Field", "Value", "Error") + val colWidths = Seq(8, 12, 25, 20, 30) + + def formatRow(cells: Seq[String]): String = { + cells.zip(colWidths).map { case (cell, width) => + cell.padTo(width, ' ').take(width) + }.mkString("| ", " | ", " |") + } + + val separator = colWidths.map("-" * _).mkString("+-", "-+-", "-+") + + val headerRow = formatRow(headers) + + val dataRows = results.map { result => + val status = if (result.hasError) "❌ Error" + else if (result.isEmpty) "⚠️ Empty" + else "✅ OK" + + val value = result.fold( + onBoolean = b => b.toString, + onNumeric = n => f"${n.doubleValue()}%.2f", + onString = s => s, + onTemporal = t => t.toString, + onObject = o => s"Object(${o.size})", + onMulti = m => s"Array(${m.size})", + onEmpty = "N/A" + ) + + val error = result.error.getOrElse("") + + formatRow(Seq(status, result.aggType.toString, result.field, value, error)) + } + + (Seq(separator, headerRow, separator) ++ dataRows ++ Seq(separator)).mkString("\n") +} + +// Usage +client.aggregate(sqlQuery).foreach { + case ElasticSuccess(results) => + println(formatAsTable(results)) + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} +``` + +--- + +## Testing + +### Test Basic Aggregation + +```scala +import org.scalatest.flatspec.AsyncFlatSpec +import org.scalatest.matchers.should.Matchers + +class AggregateApiSpec extends AsyncFlatSpec with Matchers { + + "AggregateApi" should "calculate average correctly" in { + val testIndex = "test-aggregation" + + for { + // Setup + _ <- client.createIndexAsync(testIndex) + _ <- client.indexAsync(testIndex, "1", """{"price": 100}""") + _ <- client.indexAsync(testIndex, "2", """{"price": 200}""") + _ <- client.indexAsync(testIndex, "3", """{"price": 150}""") + _ <- client.refreshAsync(testIndex) + + // Test + query = SQLQuery( + query = s"SELECT AVG(price) as avg_price FROM $testIndex" + ) + result <- client.aggregate(query) + + // Assertions + _ = result match { + case ElasticSuccess(results) => + results should not be empty + + val avgResult = results.find(_.field == "avg_price") + avgResult shouldBe defined + + avgResult.foreach { r => + r.hasError shouldBe false + r.isEmpty shouldBe false + + r.asDoubleSafe.toOption shouldBe Some(150.0) + } + + case ElasticFailure(error) => + fail(s"Aggregation failed: ${error.message}") + } + + // Cleanup + _ <- client.deleteIndexAsync(testIndex) + } yield succeed + } +} +``` + +--- + +### Test Multiple Aggregations + +```scala +"AggregateApi" should "handle multiple aggregations" in { + val testIndex = "test-multi-agg" + + for { + // Setup + _ <- client.createIndexAsync(testIndex) + _ <- Future.sequence((1 to 10).map { i => + client.indexAsync(testIndex, i.toString, s"""{"price": ${i * 10}, "stock": ${i * 5}}""") + }) + _ <- client.refreshAsync(testIndex) + + // Test + query = SQLQuery( + query = s""" + SELECT + COUNT(*) as count, + AVG(price) as avg_price, + MIN(price) as min_price, + MAX(price) as max_price, + SUM(stock) as total_stock + FROM $testIndex + """ + ) + result <- client.aggregate(query) + + // Assertions + _ = result match { + case ElasticSuccess(results) => + results should have size 5 + + val resultMap = results.map(r => r.field -> r).toMap + + // Verify COUNT + resultMap.get("count").flatMap(_.asLongSafe.toOption) shouldBe Some(10L) + + // Verify AVG + resultMap.get("avg_price").flatMap(_.asDoubleSafe.toOption) shouldBe Some(55.0) + + // Verify MIN + resultMap.get("min_price").flatMap(_.asDoubleSafe.toOption) shouldBe Some(10.0) + + // Verify MAX + resultMap.get("max_price").flatMap(_.asDoubleSafe.toOption) shouldBe Some(100.0) + + // Verify SUM + resultMap.get("total_stock").flatMap(_.asLongSafe.toOption) shouldBe Some(275L) + + case ElasticFailure(error) => + fail(s"Aggregation failed: ${error.message}") + } + + // Cleanup + _ <- client.deleteIndexAsync(testIndex) + } yield succeed +} +``` + +--- + +### Test Error Handling + +```scala +"AggregateApi" should "handle errors gracefully" in { + val testIndex = "test-agg-error" + + for { + // Setup - create index with incompatible types + _ <- client.createIndexAsync(testIndex) + _ <- client.indexAsync(testIndex, "1", """{"price": 100}""") + _ <- client.indexAsync(testIndex, "2", """{"price": "invalid"}""") + _ <- client.refreshAsync(testIndex) + + // Test + query = SQLQuery( + query = s"SELECT AVG(price) as avg_price FROM $testIndex" + ) + result <- client.aggregate(query) + + // Assertions + _ = result match { + case ElasticSuccess(results) => + results should not be empty + + // Should either have error or handle gracefully + results.foreach { r => + if (r.hasError) { + r.error shouldBe defined + r.error.get should not be empty + } + } + + case ElasticFailure(error) => + // Error is acceptable in this case + error.message should not be empty + } + + // Cleanup + _ <- client.deleteIndexAsync(testIndex) + } yield succeed +} +``` + +--- + +### Test Empty Results + +```scala +"AggregateApi" should "handle empty results" in { + val testIndex = "test-empty-agg" + + for { + // Setup - create empty index + _ <- client.createIndexAsync(testIndex) + _ <- client.refreshAsync(testIndex) + + // Test + query = SQLQuery( + query = s"SELECT AVG(price) as avg_price FROM $testIndex" + ) + result <- client.aggregate(query) + + // Assertions + _ = result match { + case ElasticSuccess(results) => + results.foreach { r => + // Should be empty or have appropriate value + if (r.isEmpty) { + r.value shouldBe EmptyValue + } + } + + case ElasticFailure(_) => + // Empty result is acceptable + succeed + } + + // Cleanup + _ <- client.deleteIndexAsync(testIndex) + } yield succeed +} +``` + +--- + +## Best Practices + +### 1. Always Use Safe Extraction + +```scala +// ❌ BAD: Unsafe extraction +val avgPrice = result.value.asInstanceOf[NumericValue].value.doubleValue() + +// ✅ GOOD: Safe extraction with error handling +val avgPrice = result.asDoubleSafe match { + case Success(price) => price + case Failure(ex) => + logger.error(s"Failed to extract price: ${ex.getMessage}") + 0.0 +} + +// ✅ BETTER: Using getOrElse +val avgPrice = result.getOrElse(0.0) { + case NumericValue(n) => Some(n.doubleValue()) + case _ => None +} +``` + +--- + +### 2. Check for Errors Before Processing + +```scala +// ✅ GOOD: Check errors first +results.foreach { result => + if (result.hasError) { + logger.error(s"Aggregation error in ${result.field}: ${result.error.get}") + } else if (result.isEmpty) { + logger.warn(s"No data for ${result.field}") + } else { + // Process valid result + processResult(result) + } +} +``` + +--- + +### 3. Use Pattern Matching with fold + +```scala +// ✅ GOOD: Exhaustive pattern matching +val formatted = result.fold( + onBoolean = b => s"Boolean: $b", + onNumeric = n => f"Numeric: ${n.doubleValue()}%.2f", + onString = s => s"String: $s", + onTemporal = t => s"Temporal: $t", + onObject = o => s"Object: $o", + onMulti = m => s"Multi: $m", + onEmpty = "Empty" +) +``` + +--- + +### 4. Handle Async Operations Properly + +```scala +// ✅ GOOD: Proper async handling +def getStats(category: String): Future[Option[Stats]] = { + val query = buildStatsQuery(category) + + client.aggregate(query).map { + case ElasticSuccess(results) => + extractStats(results) + + case ElasticFailure(error) => + logger.error(s"Aggregation failed: ${error.message}") + None + }.recover { + case ex: Exception => + logger.error(s"Unexpected error: ${ex.getMessage}", ex) + None + } +} +``` + +--- + +### 5. Create Type-Safe Result Extractors + +```scala +// ✅ GOOD: Type-safe extractor +object AggregationExtractor { + def extractDouble( + results: Seq[SingleValueAggregateResult], + field: String + ): Option[Double] = { + results + .find(_.field == field) + .filter(!_.hasError) + .flatMap(_.asDoubleSafe.toOption) + } + + def extractLong( + results: Seq[SingleValueAggregateResult], + field: String + ): Option[Long] = { + results + .find(_.field == field) + .filter(!_.hasError) + .flatMap(_.asLongSafe.toOption) + } + + def extractString( + results: Seq[SingleValueAggregateResult], + field: String + ): Option[String] = { + results + .find(_.field == field) + .filter(!_.hasError) + .flatMap(_.asStringSafe.toOption) + } +} + +// Usage +val avgPrice = AggregationExtractor.extractDouble(results, "avg_price") +val count = AggregationExtractor.extractLong(results, "count") +``` + +--- + +### 6. Use Descriptive Field Names + +```scala +// ❌ BAD: Unclear field names +"SELECT AVG(price) as a, COUNT(*) as c FROM products" + +// ✅ GOOD: Clear field names +"""SELECT + AVG(price) as avg_price, + COUNT(*) as product_count, + SUM(stock) as total_inventory +FROM products""" +``` + +--- + +### 7. Log Aggregation Queries for Debugging + +```scala +// ✅ GOOD: Log queries +def executeAggregation(query: SQLQuery): Future[ElasticResult[Seq[SingleValueAggregateResult]]] = { + logger.info(s"Executing aggregation: ${query.query}") + + val startTime = System.currentTimeMillis() + + client.aggregate(query).map { result => + val duration = System.currentTimeMillis() - startTime + logger.info(s"Aggregation completed in ${duration}ms") + + result match { + case ElasticSuccess(results) => + logger.debug(s"Got ${results.size} aggregation results") + results.foreach(r => logger.debug(r.prettyPrint)) + + case ElasticFailure(error) => + logger.error(s"Aggregation failed: ${error.message}") + } + + result + } +} +``` + +--- + +### 8. Create Reusable Aggregation Builders + +```scala +// ✅ GOOD: Reusable builders +object AggregationQueryBuilder { + def statsQuery(index: String, field: String, filters: Map[String, String] = Map.empty): SQLQuery = { + val whereClause = if (filters.nonEmpty) { + "WHERE " + filters.map { case (k, v) => s"$k = '$v'" }.mkString(" AND ") + } else "" + + SQLQuery( + query = s""" + SELECT + COUNT(*) as count, + AVG($field) as avg_value, + MIN($field) as min_value, + MAX($field) as max_value, + SUM($field) as total_value + FROM $index + $whereClause + """ + ) + } + +} + +// Usage +val priceStats = AggregationQueryBuilder.statsQuery( + "products", + "price", + Map("category" -> "electronics") +) +``` + +--- + +## Summary + +The **Aggregate API** provides: + +✅ **Type-safe aggregation results** with comprehensive value types +✅ **Safe extraction methods** with `Try` based error handling +✅ **Pattern matching support** via the `fold` method +✅ **Error handling** at both query and result levels +✅ **Pretty printing** for debugging and logging +✅ **Flexible value types** including scalars, arrays, and objects +✅ **SQL-based queries** for familiar aggregation syntax + +**Key Takeaways:** + +1. Always use safe extraction methods (`asDoubleSafe`, `asLongSafe`, etc.) +2. Check for errors before processing results +3. Use `fold` for exhaustive pattern matching +4. Handle async operations properly with Future +5. Create reusable extractors and builders +6. Log queries and results for debugging +7. Use descriptive field names in SQL queries +8. Test edge cases (empty results, errors, type mismatches) + +--- + + +--- + +[Back to index](README.md) diff --git a/documentation/client/aliases.md b/documentation/client/aliases.md new file mode 100644 index 00000000..e72597a5 --- /dev/null +++ b/documentation/client/aliases.md @@ -0,0 +1,809 @@ +[Back to index](README.md) + +# ALIAS API + +## Overview + +The **AliasApi** trait provides comprehensive alias management functionality for Elasticsearch indices, enabling flexible index naming strategies, zero-downtime deployments, and index versioning patterns. + +**Features:** +- Add/remove aliases to indices +- Atomic alias swapping for zero-downtime deployments +- Alias existence checking +- Retrieve all aliases for an index +- Full parameter validation +- Support for multi-index aliases + +**Dependencies:** +- Requires `IndicesApi` for index existence validation + +--- + +## Alias Naming Rules + +Aliases follow the same naming conventions as indices: + +- **Lowercase only** +- **No special characters:** `\`, `/`, `*`, `?`, `"`, `<`, `>`, `|`, space, comma, `#` +- **Cannot start with:** `-`, `_`, `+` +- **Cannot be:** `.` or `..` +- **Maximum length:** 255 characters + +--- + +## Public Methods + +### addAlias + +Adds an alias to an existing index. + +**Signature:** + +```scala +def addAlias(index: String, alias: String): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - The index name to add the alias to +- `alias` - The alias name to create + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if alias added successfully +- `ElasticFailure` with error details (400 for validation, 404 if index not found) + +**Validation:** +- Index name format validation +- Alias name format validation +- Index and alias cannot have the same name +- Index must exist before adding alias + +**Behavior:** +- An alias can point to multiple indices (useful for searches across versions) +- An index can have multiple aliases +- Adding an existing alias is idempotent (no error) + +**Examples:** + +```scala +// Basic alias creation +client.addAlias("products-2024-01", "products-current") match { + case ElasticSuccess(_) => println("Alias added") + case ElasticFailure(error) => println(s"Error: ${error.message}") +} + +// Version management pattern +client.createIndex("users-v2") +client.addAlias("users-v2", "users-latest") +client.addAlias("users-v2", "users") + +// Multi-index alias for searching across versions +for { + _ <- client.addAlias("logs-2024-01", "logs-all") + _ <- client.addAlias("logs-2024-02", "logs-all") + _ <- client.addAlias("logs-2024-03", "logs-all") +} yield "Multi-index alias created" + +// Environment-specific aliases +def setupEnvironmentAliases(env: String): ElasticResult[Unit] = { + val indexName = s"products-$env" + for { + _ <- client.createIndex(indexName) + _ <- client.addAlias(indexName, "products-active") + _ <- client.addAlias(indexName, s"products-$env-current") + } yield () +} + +// Error handling +client.addAlias("products", "products") match { + case ElasticFailure(error) => + assert(error.message.contains("same name")) + assert(error.statusCode.contains(400)) +} + +client.addAlias("non-existent", "my-alias") match { + case ElasticFailure(error) => + assert(error.message.contains("does not exist")) + assert(error.statusCode.contains(404)) +} +``` + +--- + +### removeAlias + +Removes an alias from an index. + +**Signature:** + +```scala +def removeAlias(index: String, alias: String): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - The index name to remove the alias from +- `alias` - The alias name to remove + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if alias removed successfully +- `ElasticFailure` with error details (400 for validation, 404 if alias not found) + +**Validation:** +- Index name format validation +- Alias name format validation + +**Behavior:** +- If the alias does not exist, Elasticsearch returns a 404 error +- Removing an alias does not affect the underlying index +- If alias points to multiple indices, only removes from specified index + +**Examples:** + +```scala +// Simple removal +client.removeAlias("products-v1", "products-current") match { + case ElasticSuccess(_) => println("Alias removed") + case ElasticFailure(error) => println(s"Error: ${error.message}") +} + +// Cleanup old aliases +val oldAliases = List("temp-alias", "test-alias", "staging-alias") +oldAliases.foreach { alias => + client.removeAlias("my-index", alias) +} + +// Safe removal with existence check +def safeRemoveAlias(index: String, alias: String): ElasticResult[Boolean] = { + client.aliasExists(alias).flatMap { + case true => client.removeAlias(index, alias) + case false => ElasticResult.success(false) + } +} + +// Remove all aliases from an index +def removeAllAliases(index: String): ElasticResult[Unit] = { + for { + aliases <- client.getAliases(index) + _ <- aliases.foldLeft(ElasticResult.success(())) { (acc, alias) => + acc.flatMap(_ => client.removeAlias(index, alias).map(_ => ())) + } + } yield () +} + +// Deployment cleanup +def cleanupOldVersion(oldIndex: String, currentAlias: String): ElasticResult[Unit] = { + for { + _ <- client.removeAlias(oldIndex, currentAlias) + _ <- client.closeIndex(oldIndex) + } yield () +} +``` + +--- + +### aliasExists + +Checks whether an alias exists in the cluster. + +**Signature:** + +```scala +def aliasExists(alias: String): ElasticResult[Boolean] +``` + +**Parameters:** +- `alias` - The alias name to check + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if alias exists, `false` otherwise +- `ElasticFailure` with error details (400 for validation errors) + +**Validation:** +- Alias name format validation + +**Examples:** + +```scala +// Simple existence check +client.aliasExists("products-current") match { + case ElasticSuccess(true) => println("Alias exists") + case ElasticSuccess(false) => println("Alias does not exist") + case ElasticFailure(error) => println(s"Error: ${error.message}") +} + +// Conditional alias creation +def ensureAliasExists(index: String, alias: String): ElasticResult[Boolean] = { + client.aliasExists(alias).flatMap { + case false => client.addAlias(index, alias) + case true => ElasticResult.success(true) + } +} + +// Pre-deployment validation +def validateDeploymentReady(newIndex: String, alias: String): ElasticResult[Boolean] = { + for { + indexExists <- client.indexExists(newIndex) + aliasExists <- client.aliasExists(alias) + } yield indexExists && aliasExists +} + +// Check multiple aliases +val requiredAliases = List("products", "products-current", "products-active") +val checks = requiredAliases.map { alias => + alias -> client.aliasExists(alias) +} + +checks.foreach { + case (alias, ElasticSuccess(true)) => println(s"✅ $alias exists") + case (alias, ElasticSuccess(false)) => println(s"❌ $alias missing") + case (alias, ElasticFailure(e)) => println(s"⚠️ Error checking $alias: ${e.message}") +} + +// Wait for alias creation +def waitForAlias(alias: String, maxAttempts: Int = 10): ElasticResult[Boolean] = { + def attempt(remaining: Int): ElasticResult[Boolean] = { + client.aliasExists(alias).flatMap { + case true => ElasticResult.success(true) + case false if remaining > 0 => + Thread.sleep(1000) + attempt(remaining - 1) + case false => + ElasticResult.failure(s"Alias $alias not found after $maxAttempts attempts") + } + } + attempt(maxAttempts) +} +``` + +--- + +### getAliases + +Retrieves all aliases associated with an index. + +**Signature:** + +```scala +def getAliases(index: String): ElasticResult[Set[String]] +``` + +**Parameters:** +- `index` - The index name to retrieve aliases for + +**Returns:** +- `ElasticSuccess[Set[String]]` containing all alias names (empty set if no aliases) +- `ElasticFailure` with error details (400 for validation errors) + +**Validation:** +- Index name format validation + +**Behavior:** +- Returns empty set if index has no aliases +- Returns empty set if index does not exist (with warning log) + +**Examples:** + +```scala +// Retrieve aliases +client.getAliases("products-v2") match { + case ElasticSuccess(aliases) => + println(s"Aliases: ${aliases.mkString(", ")}") + case ElasticFailure(error) => + println(s"Error: ${error.message}") +} + +// Check if index has specific alias +def hasAlias(index: String, alias: String): ElasticResult[Boolean] = { + client.getAliases(index).map(_.contains(alias)) +} + +// List all aliases for multiple indices +val indices = List("products-v1", "products-v2", "products-v3") +indices.foreach { index => + client.getAliases(index) match { + case ElasticSuccess(aliases) if aliases.nonEmpty => + println(s"$index: ${aliases.mkString(", ")}") + case ElasticSuccess(_) => + println(s"$index: no aliases") + case ElasticFailure(e) => + println(s"$index: error - ${e.message}") + } +} + +// Find all indices with a specific alias +def findIndicesForAlias( + indices: List[String], + targetAlias: String +): ElasticResult[List[String]] = { + val results = indices.map { index => + client.getAliases(index).map(aliases => (index, aliases)) + } + + ElasticResult.sequence(results).map { indexAliases => + indexAliases.filter(_._2.contains(targetAlias)).map(_._1) + } +} + +// Audit alias configuration +def auditAliases(indices: List[String]): Map[String, Set[String]] = { + indices.flatMap { index => + client.getAliases(index) match { + case ElasticSuccess(aliases) => Some(index -> aliases) + case ElasticFailure(_) => None + } + }.toMap +} + +// Verify expected aliases +def verifyAliases( + index: String, + expectedAliases: Set[String] +): ElasticResult[Boolean] = { + client.getAliases(index).map { actual => + actual == expectedAliases + } +} +``` + +--- + +### swapAlias + +Atomically swaps an alias from one index to another in a single operation. + +**Signature:** + +```scala +def swapAlias( + oldIndex: String, + newIndex: String, + alias: String +): ElasticResult[Boolean] +``` + +**Parameters:** +- `oldIndex` - The current index pointed to by the alias +- `newIndex` - The new index that should receive the alias +- `alias` - The alias name to swap + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if swap succeeded +- `ElasticFailure` with error details (400 for validation errors) + +**Validation:** +- Old index name format validation +- New index name format validation +- Alias name format validation +- Old and new indices must be different + +**Behavior:** +- **Atomic operation:** Alias is removed from old index and added to new index in a single request +- No downtime period where alias doesn't exist +- Recommended for zero-downtime deployments +- If alias doesn't exist on old index, it's still added to new index + +**Examples:** + +```scala +// Zero-downtime deployment +client.swapAlias( + oldIndex = "products-v1", + newIndex = "products-v2", + alias = "products" +) match { + case ElasticSuccess(_) => + println("✅ Alias swapped, new version deployed") + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} + +// Complete deployment workflow +def deployNewVersion( + currentVersion: String, + newVersion: String, + productionAlias: String +): ElasticResult[Unit] = { + for { + // Create new index + _ <- client.createIndex(newVersion) + + // Index data into new version + _ <- populateIndex(newVersion) + + // Verify data + _ <- verifyIndexData(newVersion) + + // Atomic swap + _ <- client.swapAlias(currentVersion, newVersion, productionAlias) + + // Cleanup old version + _ <- client.closeIndex(currentVersion) + } yield () +} + +// Blue-green deployment +def blueGreenDeploy(): ElasticResult[Unit] = { + val blue = "products-blue" + val green = "products-green" + val alias = "products" + + for { + // Determine current active index + aliases <- client.getAliases(blue) + (oldIndex, newIndex) = if (aliases.contains(alias)) { + (blue, green) + } else { + (green, blue) + } + + // Deploy to inactive index + _ <- client.createIndex(newIndex) + _ <- populateIndex(newIndex) + + // Swap alias + _ <- client.swapAlias(oldIndex, newIndex, alias) + } yield () +} + +// Rollback pattern +def rollback( + currentIndex: String, + previousIndex: String, + alias: String +): ElasticResult[Unit] = { + for { + _ <- client.swapAlias(currentIndex, previousIndex, alias) + _ = println(s"✅ Rolled back to $previousIndex") + } yield () +} + +// Multi-alias swap for multiple environments +def swapMultipleAliases( + oldIndex: String, + newIndex: String, + aliases: List[String] +): ElasticResult[List[Boolean]] = { + ElasticResult.sequence( + aliases.map(alias => client.swapAlias(oldIndex, newIndex, alias)) + ) +} + +// Safe swap with validation +def safeSwap( + oldIndex: String, + newIndex: String, + alias: String +): ElasticResult[Boolean] = { + for { + // Verify new index exists + newExists <- client.indexExists(newIndex) + _ <- if (!newExists) { + ElasticResult.failure(s"New index $newIndex does not exist") + } else { + ElasticResult.success(()) + } + + // Verify new index has data + // (implementation depends on your count method) + + // Perform swap + swapped <- client.swapAlias(oldIndex, newIndex, alias) + + // Verify alias now points to new index + aliases <- client.getAliases(newIndex) + _ <- if (aliases.contains(alias)) { + ElasticResult.success(()) + } else { + ElasticResult.failure("Alias swap verification failed") + } + } yield swapped +} + +// Error handling +client.swapAlias("products-v1", "products-v1", "products") match { + case ElasticFailure(error) => + assert(error.message.contains("cannot be the same")) + assert(error.statusCode.contains(400)) +} +``` + +--- + +## Implementation Requirements + +### executeAddAlias + +```scala +private[client] def executeAddAlias( + index: String, + alias: String +): ElasticResult[Boolean] +``` + +--- + +### executeRemoveAlias + +```scala +private[client] def executeRemoveAlias( + index: String, + alias: String +): ElasticResult[Boolean] +``` + +--- + +### executeAliasExists + +```scala +private[client] def executeAliasExists(alias: String): ElasticResult[Boolean] +``` + +--- + +### executeGetAliases + +```scala +private[client] def executeGetAliases(index: String): ElasticResult[String] +``` + +**Expected JSON Response Format:** + +```scala +{ + "my-index": { + "aliases": { + "alias1": {}, + "alias2": {}, + "alias3": {} + } + } +} +``` + +--- + +### executeSwapAlias + +```scala +private[client] def executeSwapAlias( + oldIndex: String, + newIndex: String, + alias: String +): ElasticResult[Boolean] +``` + +**Implementation Note:** This should use Elasticsearch's bulk alias API to perform both remove and add operations atomically in a single request. + +--- + +## Error Handling + +**Invalid Alias Name:** + +```scala +client.addAlias("my-index", "INVALID ALIAS") match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.message.contains("Invalid alias")) +} +``` + +**Same Name for Index and Alias:** + +```scala +client.addAlias("products", "products") match { + case ElasticFailure(error) => + assert(error.message.contains("same name")) +} +``` + +**Index Not Found:** + +```scala +client.addAlias("non-existent-index", "my-alias") match { + case ElasticFailure(error) => + assert(error.statusCode.contains(404)) + assert(error.message.contains("does not exist")) +} +``` + +**Alias Not Found:** + +```scala +client.removeAlias("my-index", "non-existent-alias") match { + case ElasticFailure(error) => + // Elasticsearch returns 404 when alias doesn't exist + assert(error.statusCode.contains(404)) +} +``` + +--- + +## Common Patterns + +### Zero-Downtime Deployment + +```scala +def zeroDowntimeDeployment( + currentVersion: String, + newVersion: String, + productionAlias: String, + settings: String +): ElasticResult[Unit] = { + for { + // 1. Create new index + _ <- client.createIndex(newVersion, settings) + + // 2. Populate new index + _ <- reindexData(currentVersion, newVersion) + + // 3. Verify new index + verified <- verifyNewIndex(newVersion) + _ <- if (verified) ElasticResult.success(()) + else ElasticResult.failure("New index verification failed") + + // 4. Atomic swap + _ <- client.swapAlias(currentVersion, newVersion, productionAlias) + + // 5. Monitor for issues + _ = monitorHealth(newVersion) + + // 6. Cleanup old index after grace period + _ = scheduleCleanup(currentVersion) + } yield () +} +``` + +### Blue-Green Deployment + +```scala +object BlueGreenDeployment { + val BLUE = "products-blue" + val GREEN = "products-green" + val ALIAS = "products" + + def deploy(data: Seq[Document]): ElasticResult[Unit] = { + for { + // Find current active + blueAliases <- client.getAliases(BLUE) + (active, inactive) = if (blueAliases.contains(ALIAS)) { + (BLUE, GREEN) + } else { + (GREEN, BLUE) + } + + // Deploy to inactive + _ <- client.deleteIndex(inactive).recover(_ => false) + _ <- client.createIndex(inactive) + _ <- bulkIndex(inactive, data) + + // Swap + _ <- client.swapAlias(active, inactive, ALIAS) + } yield () + } + + def rollback(): ElasticResult[Unit] = { + for { + greenAliases <- client.getAliases(GREEN) + (current, previous) = if (greenAliases.contains(ALIAS)) { + (GREEN, BLUE) + } else { + (BLUE, GREEN) + } + _ <- client.swapAlias(current, previous, ALIAS) + } yield () + } +} +``` + +### Time-Based Index Management + +```scala +def setupMonthlyIndex(year: Int, month: Int): ElasticResult[Unit] = { + val indexName = f"logs-$year-$month%02d" + val currentAlias = "logs-current" + val allLogsAlias = "logs-all" + + for { + // Create new month's index + _ <- client.createIndex(indexName) + + // Add to "all logs" alias (multi-index) + _ <- client.addAlias(indexName, allLogsAlias) + + // Find previous current index + previousIndices <- findIndicesWithAlias(currentAlias) + + // Swap current alias to new month + _ <- previousIndices.headOption match { + case Some(prevIndex) => + client.swapAlias(prevIndex, indexName, currentAlias) + case None => + client.addAlias(indexName, currentAlias) + } + } yield () +} +``` + +### Alias-Based Read/Write Splitting + +```scala +object ReadWriteSplit { + val WRITE_ALIAS = "products-write" + val READ_ALIAS = "products-read" + + def setupSplit(activeIndex: String, replicaIndices: List[String]): ElasticResult[Unit] = { + for { + // Write alias points to single active index + _ <- client.addAlias(activeIndex, WRITE_ALIAS) + + // Read alias points to all indices + _ <- client.addAlias(activeIndex, READ_ALIAS) + _ <- replicaIndices.foldLeft(ElasticResult.success(())) { (acc, index) => + acc.flatMap(_ => client.addAlias(index, READ_ALIAS).map(_ => ())) + } + } yield () + } +} +``` + +--- + +## Best Practices + +**1. Always Use Aliases in Application Code** + +```scala +// ❌ Bad - hardcoded index names +client.search("products-v2", query) + +// ✅ Good - use aliases +client.search("products", query) +``` + +**2. Atomic Swaps for Deployments** + +```scala +// ❌ Bad - non-atomic, causes downtime +client.removeAlias(oldIndex, alias) +Thread.sleep(100) // ⚠️ Alias doesn't exist here! +client.addAlias(newIndex, alias) + +// ✅ Good - atomic operation +client.swapAlias(oldIndex, newIndex, alias) +``` + +**3. Verify Before Swapping** + +```scala +def verifiedSwap( + oldIndex: String, + newIndex: String, + alias: String +): ElasticResult[Boolean] = { + for { + // Verify new index is ready + exists <- client.indexExists(newIndex) + _ <- if (!exists) ElasticResult.failure("New index missing") + else ElasticResult.success(()) + + // Perform swap + swapped <- client.swapAlias(oldIndex, newIndex, alias) + } yield swapped +} +``` + +**4. Use Descriptive Alias Names** + +```scala +// ✅ Good alias naming +"products-current" // Active version +"products-read" // Read operations +"products-write" // Write operations +"products-all" // All versions +"products-staging" // Staging environment +``` + +--- + +[Back to index](README.md) | [Next: Mappings API](mappings.md) \ No newline at end of file diff --git a/documentation/client/bulk.md b/documentation/client/bulk.md new file mode 100644 index 00000000..d2f5ed22 --- /dev/null +++ b/documentation/client/bulk.md @@ -0,0 +1,1566 @@ +[Back to index](README.md) + +# BULK API + +## Overview + +The **BulkApi** trait provides high-performance bulk operations for Elasticsearch using Akka Streams, supporting indexing, updating, and deleting large volumes of documents with advanced features like automatic retry, progress tracking, and detailed result reporting. + +**Features:** +- **High-performance streaming** with Akka Streams +- **Automatic retry** with exponential backoff +- **Parallel processing** with configurable balance +- **Real-time progress tracking** and metrics +- **Detailed success/failure reporting** +- **Automatic index refresh management** +- **Date-based index suffixing** +- **Upsert and delete operations** +- **Configurable batch sizes** +- **Event callbacks** for monitoring + +**Dependencies:** +- Requires `RefreshApi` for index refresh operations +- Requires `SettingsApi` for index settings management +- Requires `IndexApi` for individual document operations (retry) +- Requires Akka Streams for reactive processing + +--- + +## Core Concepts + +### Bulk Operations Flow + +```scala +// Data flow pipeline +Iterator[D] + -> Transform to JSON + -> Create BulkItem + -> Apply settings (refresh, replicas) + -> Group into batches + -> Execute bulk requests (parallel) + -> Extract results + -> Retry failures (automatic) + -> Return Either[Failed, Success] +``` + +### Operation Types + +| Operation | Action | Behavior | +|------------|----------------|-------------------------------------------| +| **INDEX** | Insert/Replace | Creates or replaces entire document | +| **UPDATE** | Upsert | Updates existing or creates new (partial) | +| **DELETE** | Remove | Deletes document by ID | + +### Result Types + +```scala +// Success result +Right(SuccessfulDocument(id = "doc-001", index = "products")) + +// Failure result +Left(FailedDocument( + id = "doc-001", + index = "products", + document = """{"name": "Product"}""", + error = "version_conflict_engine_exception", + retryable = true +)) +``` + +--- + +## Configuration + +### BulkOptions + +```scala +case class BulkOptions( + defaultIndex: String, // Base index name + maxBulkSize: Int = 1000, // Documents per batch + balance: Int = 4, // Parallel workers + disableRefresh: Boolean = false, // Disable auto-refresh + retryOnFailure: Boolean = true, // Enable auto-retry + maxRetries: Int = 3, // Max retry attempts + retryDelay: FiniteDuration = 1.second, // Initial retry delay + retryBackoffMultiplier: Double = 2.0, // Backoff multiplier + logEvery: Int = 10 // Log progress every N batches +) + +// Usage +implicit val bulkOptions = BulkOptions( + defaultIndex = "products", + maxBulkSize = 5000, + balance = 8, + retryOnFailure = true +) +``` + +### BulkCallbacks + +```scala +case class BulkCallbacks( + onSuccess: (String, String) => Unit = (_, _) => (), + onFailure: FailedDocument => Unit = _ => (), + onComplete: BulkResult => Unit = _ => (), + onBatchComplete: (Int, BulkMetrics) => Unit = (_, _) => {} +) + +// Custom callbacks +val callbacks = BulkCallbacks( + onSuccess = (id, index) => println(s"✅ Indexed: $id in $index"), + onFailure = failed => println(s"❌ Failed: ${failed.id} - ${failed.error}"), + onComplete = result => + println( + s"📊 Bulk completed: ${result.successCount} successes, ${result.failedCount} failures " + + s"in ${result.metrics.durationMs}ms (${result.metrics.throughput} docs/sec)" + ), + onBatchComplete = (batchSize, metrics) => + println(s"📊 Batch completed: $batchSize docs (${metrics.throughput} docs/sec)") +) +``` + +--- + +## Public Methods + +### bulkWithResult + +Executes bulk operations with detailed success/failure reporting and metrics. + +**Signature:** + +```scala +def bulkWithResult[D]( + items: Iterator[D], + toDocument: D => String, + indexKey: Option[String] = None, + idKey: Option[String] = None, + suffixDateKey: Option[String] = None, + suffixDatePattern: Option[String] = None, + update: Option[Boolean] = None, + delete: Option[Boolean] = None, + parentIdKey: Option[String] = None, + callbacks: BulkCallbacks = BulkCallbacks.default +)(implicit bulkOptions: BulkOptions, system: ActorSystem): Future[BulkResult] +``` + +**Parameters:** +- `items` - Iterator of documents to process +- `toDocument` - Function to convert document to JSON string +- `indexKey` - Optional field name containing index name +- `idKey` - Optional field name containing document ID +- `suffixDateKey` - Optional date field for index suffix (e.g., "2024-01-15") +- `suffixDatePattern` - Date pattern for suffix formatting +- `update` - If true, performs upsert operation +- `delete` - If true, performs delete operation +- `parentIdKey` - Optional parent document ID field +- `callbacks` - Event callbacks for monitoring +- `bulkOptions` - Implicit bulk configuration +- `system` - Implicit ActorSystem for Akka Streams + +**Returns:** +- `Future[BulkResult]` with detailed success/failure information + +**BulkResult Structure:** + +```scala +case class BulkResult( + successCount: Int, // Number of successful operations + successIds: Set[String], // IDs of successful documents + failedCount: Int, // Number of failed operations + failedDocuments: Seq[FailedDocument], // Failed documents with errors + indices: Set[String], // All indices affected + metrics: BulkMetrics // Performance metrics +) + +case class BulkMetrics( + startTime: Long = System.currentTimeMillis(), + endTime: Option[Long] = None, + totalBatches: Int = 0, + totalDocuments: Int = 0, + failuresByStatus: Map[Int, Int] = Map.empty, + failuresByType: Map[String, Int] = Map.empty +) { + def durationMs: Long = endTime.getOrElse(System.currentTimeMillis()) - startTime + + def throughput: Double = // Documents per second + if (durationMs > 0) totalDocuments * 1000.0 / durationMs + else 0.0 + + def complete: BulkMetrics = copy(endTime = Some(System.currentTimeMillis())) + + def addFailure(error: BulkError): BulkMetrics = copy( + failuresByStatus = + failuresByStatus + (error.status -> (failuresByStatus.getOrElse(error.status, 0) + 1)), + failuresByType = + failuresByType + (error.`type` -> (failuresByType.getOrElse(error.`type`, 0) + 1)) + ) +} +``` + +**Examples:** + +```scala +import akka.actor.ActorSystem +import scala.concurrent.ExecutionContext.Implicits.global + +implicit val system: ActorSystem = ActorSystem("bulk-system") +implicit val bulkOptions: BulkOptions = BulkOptions( + defaultIndex = "products", + maxBulkSize = 1000, + balance = 4 +) + +// Domain model +case class Product(id: String, name: String, price: Double, category: String) + +// Basic bulk indexing +val products: Iterator[Product] = getProducts() // Large dataset + +val toJson: Product => String = product => s""" +{ + "id": "${product.id}", + "name": "${product.name}", + "price": ${product.price}, + "category": "${product.category}" +} +""" + +val resultFuture: Future[BulkResult] = client.bulkWithResult( + items = products, + toDocument = toJson, + idKey = Some("id") +) + +resultFuture.foreach { result => + println(s"✅ Success: ${result.successCount}") + println(s"❌ Failed: ${result.failedCount}") + println(s"📊 Throughput: ${result.metrics.throughput} docs/sec") + + // Handle failures + result.failedDocuments.foreach { failed => + println(s"Failed ID: ${failed.id}, Error: ${failed.error}") + } +} + +// With callbacks for real-time monitoring +val callbacks = BulkCallbacks( + onSuccess = (id, index) => + logger.info(s"✅ Indexed document $id in $index"), + + onFailure = failed => + logger.error(s"❌ Failed to index ${failed.id}: ${failed.error}"), + + onComplete = result => { + logger.info(s""" + |Bulk operation completed: + | - Success: ${result.successCount} + | - Failed: ${result.failedCount} + | - Duration: ${result.metrics.durationMs}ms + | - Throughput: ${result.metrics.throughput} docs/sec + """.stripMargin) + } +) + +client.bulkWithResult( + items = products, + toDocument = toJson, + idKey = Some("id"), + callbacks = callbacks +) + +// Bulk update (upsert) +client.bulkWithResult( + items = productUpdates, + toDocument = toJson, + idKey = Some("id"), + update = Some(true) // Upsert mode +).foreach { result => + println(s"Updated ${result.successCount} products") +} + +// Bulk delete +val idsToDelete: Iterator[String] = getObsoleteProductIds() + +client.bulkWithResult( + items = idsToDelete.map(id => Map("id" -> id)), + toDocument = doc => s"""{"id": "${doc("id")}"}""", + idKey = Some("id"), + delete = Some(true) +) + +// Date-based index suffixing +case class LogEntry(id: String, message: String, timestamp: String) + +val logs: Iterator[LogEntry] = getLogEntries() + +client.bulkWithResult( + items = logs, + toDocument = log => s""" + { + "id": "${log.id}", + "message": "${log.message}", + "timestamp": "${log.timestamp}" + } + """, + idKey = Some("id"), + suffixDateKey = Some("timestamp"), // Field containing date + suffixDatePattern = Some("yyyy-MM-dd") // Pattern for suffix +)( + bulkOptions.copy(defaultIndex = "logs"), // Base index: "logs-2024-01-15" + system +) + +// Error handling and retry analysis +resultFuture.foreach { result => + if (result.failedCount > 0) { + // Group errors by type + val errorsByType = result.failedDocuments + .groupBy(_.error) + .mapValues(_.size) + + errorsByType.foreach { case (errorType, count) => + println(s"Error: $errorType - Count: $count") + } + + // Identify retryable failures + val retryable = result.failedDocuments.filter(_.retryable) + println(s"Retryable failures: ${retryable.size}") + } +} + +// Performance tuning example +implicit val highThroughputOptions: BulkOptions = BulkOptions( + defaultIndex = "products", + maxBulkSize = 5000, // Larger batches + balance = 8, // More parallel workers + disableRefresh = true, // Disable auto-refresh for speed + retryOnFailure = true, + maxRetries = 5 +) + +client.bulkWithResult( + items = largeDataset, + toDocument = toJson, + idKey = Some("id") +).foreach { result => + // Manual refresh after bulk + result.indices.foreach(client.refresh) + println(s"Bulk completed: ${result.metrics.throughput} docs/sec") +} +``` + +--- + +### bulkSource + +Returns an Akka Streams Source that emits real-time results for each document. + +**Signature:** + +```scala +def bulkSource[D]( + items: Iterator[D], + toDocument: D => String, + idKey: Option[String] = None, + suffixDateKey: Option[String] = None, + suffixDatePattern: Option[String] = None, + update: Option[Boolean] = None, + delete: Option[Boolean] = None, + parentIdKey: Option[String] = None +)(implicit + bulkOptions: BulkOptions, + system: ActorSystem +): Source[Either[FailedDocument, SuccessfulDocument], NotUsed] +``` + +**Parameters:** +- Same as `bulkWithResult` (except callbacks) + +**Returns:** +- `Source[Either[FailedDocument, SuccessfulDocument], NotUsed]` +- Emits `Right(SuccessfulDocument)` for success +- Emits `Left(FailedDocument)` for failure + +**Use Cases:** +- Custom stream processing +- Real-time monitoring +- Integration with other Akka Streams +- Custom error handling logic + +**Examples:** + +```scala +import akka.stream.scaladsl._ +import akka.NotUsed + +// Basic streaming with real-time results +val source: Source[Either[FailedDocument, SuccessfulDocument], NotUsed] = + client.bulkSource( + items = products, + toDocument = toJson, + idKey = Some("id") + ) + +// Process results in real-time +source.runWith(Sink.foreach { + case Right(success) => + println(s"✅ Success: ${success.id} in ${success.index}") + + case Left(failed) => + println(s"❌ Failed: ${failed.id} - ${failed.error}") +}) + +// Count successes and failures +source + .runWith(Sink.fold((0, 0)) { + case ((successCount, failCount), Right(_)) => + (successCount + 1, failCount) + case ((successCount, failCount), Left(_)) => + (successCount, failCount + 1) + }) + .foreach { case (success, failed) => + println(s"Results: $success success, $failed failed") + } + +// Filter only failures +source + .collect { case Left(failed) => failed } + .runWith(Sink.foreach { failed => + logger.error(s"Failed document: ${failed.id}") + }) + +// Custom retry logic +source + .mapAsync(1) { + case Right(success) => Future.successful(Right(success)) + + case Left(failed) if failed.retryable => + // Custom retry logic + retryDocument(failed).map { + case true => Right(SuccessfulDocument(failed.id, failed.index)) + case false => Left(failed) + } + + case Left(failed) => Future.successful(Left(failed)) + } + .runWith(Sink.ignore) + +// Integration with other streams +val csvSource: Source[String, NotUsed] = + FileIO.fromPath(Paths.get("products.csv")) + .via(Framing.delimiter(ByteString("\n"), 1024)) + .map(_.utf8String) + +csvSource + .map(parseCsvLine) + .grouped(1000) + .flatMapConcat { batch => + client.bulkSource( + items = batch.iterator, + toDocument = toJson, + idKey = Some("id") + ) + } + .runWith(Sink.foreach { + case Right(success) => println(s"✅ ${success.id}") + case Left(failed) => println(s"❌ ${failed.id}") + }) + +// Progress tracking +var processed = 0 +source + .map { result => + processed += 1 + if (processed % 1000 == 0) { + println(s"Processed: $processed documents") + } + result + } + .runWith(Sink.ignore) + +// Write failures to file +source + .collect { case Left(failed) => failed } + .map(failed => s"${failed.id},${failed.error}\n") + .map(ByteString(_)) + .runWith(FileIO.toPath(Paths.get("failures.csv"))) + +// Broadcast to multiple sinks +source + .alsoTo(Sink.foreach { + case Right(success) => metricsCollector.recordSuccess() + case Left(failed) => metricsCollector.recordFailure() + }) + .runWith(Sink.ignore) +``` + +--- + +### bulk (Deprecated) + +Legacy synchronous bulk method. **Use `bulkWithResult` instead.** + +**Signature:** + +```scala +@deprecated("Use bulkWithResult for better error handling") +def bulk[D]( + items: Iterator[D], + toDocument: D => String, + idKey: Option[String] = None, + suffixDateKey: Option[String] = None, + suffixDatePattern: Option[String] = None, + update: Option[Boolean] = None, + delete: Option[Boolean] = None, + parentIdKey: Option[String] = None +)(implicit bulkOptions: BulkOptions, system: ActorSystem): ElasticResult[BulkResult] +``` + +**Note:** This method blocks the current thread. Use `bulkWithResult` for non-blocking operations. + +--- + +## Implementation Requirements + +### toBulkElasticAction + +```scala +implicit private[client] def toBulkElasticAction( + a: BulkActionType +): BulkElasticAction +``` + +Converts internal `BulkActionType` to Elasticsearch-specific bulk action. + +--- + +### bulkFlow + +```scala +private[client] def bulkFlow(implicit + bulkOptions: BulkOptions, + system: ActorSystem +): Flow[Seq[BulkActionType], BulkResultType, NotUsed] +``` + +**Implementation Example:** + +```scala +private[client] def bulkFlow(implicit + bulkOptions: BulkOptions, + system: ActorSystem +): Flow[Seq[BulkActionType], BulkResultType, NotUsed] = { + + implicit val ec: ExecutionContext = system.dispatcher + + Flow[Seq[BulkActionType]] + .mapAsync(1) { actions => + val bulkRequest = new BulkRequest() + + actions.foreach { action => + val elasticAction = toBulkElasticAction(action) + bulkRequest.add(elasticAction) + } + + Future { + client.bulk(bulkRequest, RequestOptions.DEFAULT) + } + } +} +``` + +--- + +### extractBulkResults + +```scala +private[client] def extractBulkResults( + result: BulkResultType, + originalBatch: Seq[BulkItem] +): Seq[Either[FailedDocument, SuccessfulDocument]] +``` + +**Implementation Example:** + +```scala +private[client] def extractBulkResults( + result: BulkResponse, + originalBatch: Seq[BulkItem] +): Seq[Either[FailedDocument, SuccessfulDocument]] = { + + result.getItems.zip(originalBatch).map { case (item, original) => + if (item.isFailed) { + Left(FailedDocument( + id = original.id.getOrElse("unknown"), + index = original.index, + document = original.document, + error = item.getFailureMessage, + retryable = isRetryable(item.getFailure) + )) + } else { + Right(SuccessfulDocument( + id = item.getId, + index = item.getIndex + )) + } + } +} + +private def isRetryable(failure: BulkItemResponse.Failure): Boolean = { + val retryableErrors = Set( + "version_conflict_engine_exception", + "es_rejected_execution_exception", + "timeout_exception" + ) + retryableErrors.exists(failure.getMessage.contains) +} +``` + +--- + +### toBulkAction & actionToBulkItem + +```scala +private[client] def toBulkAction(bulkItem: BulkItem): BulkActionType + +private[client] def actionToBulkItem(action: BulkActionType): BulkItem +``` + +Bidirectional conversion between internal `BulkItem` and Elasticsearch-specific `BulkActionType`. + +--- + +## Common Patterns + +### High-Throughput Indexing + +```scala +// Optimize for maximum throughput +implicit val highPerformanceOptions: BulkOptions = BulkOptions( + defaultIndex = "products", + maxBulkSize = 10000, // Large batches + balance = 16, // Many parallel workers + disableRefresh = true, // No refresh during bulk + retryOnFailure = false, // Skip retry for speed + logEvery = 50 // Less frequent logging +) + +val result = client.bulkWithResult( + items = massiveDataset, + toDocument = toJson, + idKey = Some("id") +) + +result.foreach { r => + // Manual refresh once at the end + r.indices.foreach(client.refresh) + println(s"Indexed ${r.successCount} documents at ${r.metrics.throughput} docs/sec") +} +``` + +### Reliable Indexing with Retry + +```scala +// Optimize for reliability +implicit val reliableOptions: BulkOptions = BulkOptions( + defaultIndex = "critical-data", + maxBulkSize = 500, // Smaller batches + balance = 2, // Conservative parallelism + disableRefresh = false, // Auto-refresh + retryOnFailure = true, // Enable retry + maxRetries = 5, // More retry attempts + retryDelay = 2.seconds, // Longer initial delay + retryBackoffMultiplier = 3.0 +) + +val result = client.bulkWithResult( + items = criticalData, + toDocument = toJson, + idKey = Some("id") +) + +result.foreach { r => + if (r.failedCount > 0) { + // Log all failures for investigation + r.failedDocuments.foreach { failed => + logger.error(s"Critical failure: ${failed.id} - ${failed.error}") + alerting.sendAlert(s"Failed to index critical document: ${failed.id}") + } + } +} +``` + +### Time-Series Data with Date Suffixes + +```scala +case class LogEntry( + id: String, + timestamp: String, // ISO format: "2024-01-15T10:30:00Z" + level: String, + message: String +) + +val logs: Iterator[LogEntry] = streamLogs() + +implicit val logOptions: BulkOptions = BulkOptions( + defaultIndex = "logs", // Base index + maxBulkSize = 2000, + balance = 4 +) + +client.bulkWithResult( + items = logs, + toDocument = log => s""" + { + "id": "${log.id}", + "timestamp": "${log.timestamp}", + "level": "${log.level}", + "message": "${log.message}" + } + """, + idKey = Some("id"), + suffixDateKey = Some("timestamp"), + suffixDatePattern = Some("yyyy-MM-dd") +) +// Creates indices: logs-2024-01-15, logs-2024-01-16, etc. +``` + +### Incremental Updates + +```scala +case class ProductUpdate(id: String, price: Double, stock: Int) + +val updates: Iterator[ProductUpdate] = getProductUpdates() + +client.bulkWithResult( + items = updates, + toDocument = update => s""" + { + "id": "${update.id}", + "price": ${update.price}, + "stock": ${update.stock} + } + """, + idKey = Some("id"), + update = Some(true) // Upsert mode +).foreach { result => + println(s"Updated ${result.successCount} products") +} +``` + +### Batch Deletion + +```scala +val obsoleteIds: Iterator[String] = findObsoleteDocuments() + +client.bulkWithResult( + items = obsoleteIds.map(id => Map("id" -> id)), + toDocument = doc => s"""{"id": "${doc("id")}"}""", + idKey = Some("id"), + delete = Some(true) +).foreach { result => + println(s"Deleted ${result.successCount} documents") +} +``` + +--- + +## Performance Optimization + +### Tuning Parameters + +| Parameter | Low Throughput | Balanced | High Throughput | +|------------------|-----------------|-----------|------------------| +| `maxBulkSize` | 500 | 1000 | 5000-10000 | +| `balance` | 1-2 | 4 | 8-16 | +| `disableRefresh` | false | false | true | +| `retryOnFailure` | true | true | false | + +### Memory Considerations + +```scala +// For large documents, use smaller batches +implicit val largeDocOptions: BulkOptions = BulkOptions( + defaultIndex = "documents", + maxBulkSize = 100, // Fewer large documents per batch + balance = 2 +) + +// For small documents, use larger batches +implicit val smallDocOptions: BulkOptions = BulkOptions( + defaultIndex = "events", + maxBulkSize = 10000, // Many small documents per batch + balance = 8 +) +``` + +### Backpressure Handling + +```scala +// Akka Streams automatically handles backpressure +val source = client.bulkSource( + items = infiniteStream, + toDocument = toJson, + idKey = Some("id") +) + +// Add throttling if needed +source + .throttle(1000, 1.second) // Max 1000 docs/sec + .runWith(Sink.foreach { + case Right(success) => println(s"✅ ${success.id}") + case Left(failed) => println(s"❌ ${failed.id}") + }) +end_scalar +``` + +--- + +## Error Handling + +### Retryable vs Non-Retryable Errors + +```scala +// Retryable errors (automatic retry) +val retryableErrors = Set( + "version_conflict_engine_exception", // Concurrent modification + "es_rejected_execution_exception", // Queue full + "timeout_exception", // Temporary timeout + "connect_exception" // Network issue +) + +// Non-retryable errors (fail immediately) +val nonRetryableErrors = Set( + "mapper_parsing_exception", // Invalid document structure + "illegal_argument_exception", // Invalid field value + "index_not_found_exception" // Missing index +) +``` + +### Handling Failures + +```scala +val result = client.bulkWithResult( + items = products, + toDocument = toJson, + idKey = Some("id") +) + +result.foreach { r => + if (r.failedCount > 0) { + // Group by error type + val errorGroups = r.failedDocuments.groupBy(_.error) + + errorGroups.foreach { case (errorType, failures) => + println(s"Error: $errorType") + println(s"Count: ${failures.size}") + + // Handle specific error types + errorType match { + case e if e.contains("mapper_parsing") => + // Log invalid documents for review + failures.foreach { f => + logger.error(s"Invalid document: ${f.document}") + } + + case e if e.contains("version_conflict") => + // Retry with latest version + failures.foreach { f => + retryWithFreshVersion(f.id) + } + + case _ => + logger.error(s"Unhandled error: $errorType") + } + } + } +} +``` + +--- + +## Monitoring and Metrics + +### Real-Time Progress Tracking + +```scala +val callbacks = BulkCallbacks( + onSuccess = (id, index) => { + metricsCollector.incrementSuccess() + }, + + onFailure = failed => { + metricsCollector.incrementFailure(failed.error) + }, + + onComplete = result => { + val metrics = result.metrics + logger.info(s""" + |Bulk Operation Summary: + | Duration: ${metrics.durationMs}ms + | Total Documents: ${metrics.totalDocuments} + | Success: ${result.successCount} + | Failed: ${result.failedCount} + | Throughput: ${metrics.throughput} docs/sec + | Batches: ${metrics.totalBatches} + | Indices: ${result.indices.mkString(", ")} + """.stripMargin) + + // Error breakdown + metrics.errorsByType.foreach { case (errorType, count) => + logger.info(s" $errorType: $count") + } + } +) + +client.bulkWithResult( + items = products, + toDocument = toJson, + idKey = Some("id"), + callbacks = callbacks +) +``` + +### Custom Metrics Collection + +```scala +var successCount = 0 +var failureCount = 0 +val startTime = System.currentTimeMillis() + +client.bulkSource( + items = products, + toDocument = toJson, + idKey = Some("id") +).runWith(Sink.foreach { + case Right(_) => + successCount += 1 + if (successCount % 1000 == 0) { + val elapsed = System.currentTimeMillis() - startTime + val throughput = (successCount * 1000.0) / elapsed + println(s"Progress: $successCount docs, $throughput docs/sec") + } + + case Left(_) => + failureCount += 1 +}) +``` + +--- + +## Best Practices + +**1. Choose Appropriate Batch Sizes** + +```scala +// ✅ Good - balanced batch size +implicit val options = BulkOptions( + defaultIndex = "products", + maxBulkSize = 1000 // Good for most use cases +) + +// ❌ Too small - overhead +implicit val tooSmall = BulkOptions(maxBulkSize = 10) + +// ❌ Too large - memory issues +implicit val tooLarge = BulkOptions(maxBulkSize = 100000) +``` + +**2. Disable Refresh for Large Bulks** + +```scala +// ✅ Good - disable refresh during bulk +implicit val options = BulkOptions( + defaultIndex = "products", + disableRefresh = true +) + +val result = client.bulkWithResult(items, toJson, Some("id")) +result.foreach { r => + // Manual refresh once at the end + r.indices.foreach(client.refresh) +} +``` + +**3. Handle Failures Appropriately** + +```scala +// ✅ Good - detailed failure handling +result.foreach { r => + r.failedDocuments.foreach { failed => + if (failed.retryable) { + retryQueue.add(failed) + } else { + deadLetterQueue.add(failed) + } + } +} + +// ❌ Avoid - ignoring failures +result.foreach { r => + println(s"Success: ${r.successCount}") + // Failures ignored! +} +``` + +**4. Use Callbacks for Monitoring** + +```scala +// ✅ Good - real-time monitoring +val callbacks = BulkCallbacks( + onSuccess = (id, index) => recordSuccess(id, index), + onFailure = failed => recordFailure(failed.error), + onComplete = result => sendCompletionNotification(result) +) + +client.bulkWithResult(items, toJson, Some("id"), callbacks = callbacks) +``` + +**5. Tune Parallelism Based on Cluster Size** + +```scala +// Small cluster (1-3 nodes) +implicit val smallCluster = BulkOptions(balance = 2) + +// Medium cluster (4-10 nodes) +implicit val mediumCluster = BulkOptions(balance = 4) + +// Large cluster (10+ nodes) +implicit val largeCluster = BulkOptions(balance = 8) +``` + +--- + +## Testing Scenarios + +### Test Basic Bulk Indexing + +```scala +def testBulkIndexing()(implicit system: ActorSystem): Future[Unit] = { + implicit val bulkOptions: BulkOptions = BulkOptions( + defaultIndex = "test-bulk", + maxBulkSize = 100 + ) + + val testData = (1 to 1000).map { i => + Map("id" -> s"doc-$i", "name" -> s"Product $i", "price" -> (i * 10.0)) + } + + val toJson: Map[String, Any] => String = doc => s""" + { + "id": "${doc("id")}", + "name": "${doc("name")}", + "price": ${doc("price")} + } + """ + + client.bulkWithResult( + items = testData.iterator, + toDocument = toJson, + idKey = Some("id") + ).map { result => + assert(result.successCount == 1000, "All documents should be indexed") + assert(result.failedCount == 0, "No failures expected") + assert(result.indices.contains("test-bulk"), "Index should be created") + + println(s"✅ Bulk test passed: ${result.successCount} documents indexed") + } +} +``` + +### Test Bulk Update + +```scala +def testBulkUpdate()(implicit system: ActorSystem): Future[Unit] = { + implicit val bulkOptions: BulkOptions = BulkOptions(defaultIndex = "test-bulk") + + for { + // First, index documents + _ <- client.bulkWithResult( + items = testData.iterator, + toDocument = toJson, + idKey = Some("id") + ) + + // Then, update them + updates = testData.map(doc => doc.updated("price", 999.99)) + updateResult <- client.bulkWithResult( + items = updates.iterator, + toDocument = toJson, + idKey = Some("id"), + update = Some(true) + ) + + _ = assert(updateResult.successCount == testData.size, "All updates should succeed") + + // Verify updates + doc <- client.get("doc-1", "test-bulk") + _ = assert(doc.contains("999.99"), "Price should be updated") + } yield { + println("✅ Bulk update test passed") + } +} +``` + +### Test Bulk Delete + +```scala +def testBulkDelete()(implicit system: ActorSystem): Future[Unit] = { + implicit val bulkOptions: BulkOptions = BulkOptions(defaultIndex = "test-bulk") + + for { + // Index documents + _ <- client.bulkWithResult( + items = testData.iterator, + toDocument = toJson, + idKey = Some("id") + ) + + // Delete them + deleteResult <- client.bulkWithResult( + items = testData.iterator, + toDocument = toJson, + idKey = Some("id"), + delete = Some(true) + ) + + _ = assert(deleteResult.successCount == testData.size, "All deletes should succeed") + + // Verify deletion + exists <- client.exists("doc-1", "test-bulk") + _ = assert(!exists, "Document should be deleted") + } yield { + println("✅ Bulk delete test passed") + } +} +``` + +### Test Error Handling + +```scala +def testBulkErrorHandling()(implicit system: ActorSystem): Future[Unit] = { + implicit val bulkOptions: BulkOptions = BulkOptions( + defaultIndex = "test-bulk", + retryOnFailure = false // Disable retry for testing + ) + + val mixedData = Seq( + """{"id": "valid-1", "name": "Valid Product"}""", + """{"id": "invalid", "name": INVALID_JSON}""", // Invalid JSON + """{"id": "valid-2", "name": "Another Valid"}""" + ) + + client.bulkWithResult( + items = mixedData.iterator, + toDocument = identity, + idKey = Some("id") + ).map { result => + assert(result.successCount == 2, "Two valid documents should succeed") + assert(result.failedCount == 1, "One invalid document should fail") + + val failed = result.failedDocuments.head + assert(failed.id == "invalid", "Failed document ID should match") + assert(failed.error.contains("parse"), "Error should mention parsing") + + println("✅ Error handling test passed") + } +} +``` + +### Test Date-Based Index Suffixing + +```scala +def testDateSuffixing()(implicit system: ActorSystem): Future[Unit] = { + implicit val bulkOptions: BulkOptions = BulkOptions(defaultIndex = "logs") + + val logs = Seq( + """{"id": "log-1", "timestamp": "2024-01-15T10:00:00Z", "message": "Log 1"}""", + """{"id": "log-2", "timestamp": "2024-01-16T10:00:00Z", "message": "Log 2"}""", + """{"id": "log-3", "timestamp": "2024-01-17T10:00:00Z", "message": "Log 3"}""" + ) + + client.bulkWithResult( + items = logs.iterator, + toDocument = identity, + idKey = Some("id"), + suffixDateKey = Some("timestamp"), + suffixDatePattern = Some("yyyy-MM-dd") + ).map { result => + assert(result.successCount == 3, "All logs should be indexed") + assert(result.indices.contains("logs-2024-01-15"), "Index with date suffix should exist") + assert(result.indices.contains("logs-2024-01-16"), "Index with date suffix should exist") + assert(result.indices.contains("logs-2024-01-17"), "Index with date suffix should exist") + assert(result.indices.size == 3, "Three different indices should be created") + + println("✅ Date suffixing test passed") + } +} +``` + +### Test Retry Mechanism + +```scala +def testRetryMechanism()(implicit system: ActorSystem): Future[Unit] = { + implicit val bulkOptions: BulkOptions = BulkOptions( + defaultIndex = "test-bulk", + retryOnFailure = true, + maxRetries = 3, + retryDelay = 100.millis + ) + + var attemptCount = 0 + + // Simulate transient failure + val mockData = Seq("""{"id": "doc-1", "name": "Test"}""") + + client.bulkWithResult( + items = mockData.iterator, + toDocument = { doc => + attemptCount += 1 + if (attemptCount < 3) { + // Simulate transient error + throw new Exception("Simulated transient error") + } + doc + }, + idKey = Some("id") + ).map { result => + assert(result.successCount == 1, "Document should succeed after retry") + assert(attemptCount >= 2, "Should have retried at least once") + + println(s"✅ Retry test passed (attempts: $attemptCount)") + } +} +``` + +### Test Performance Metrics + +```scala +def testPerformanceMetrics()(implicit system: ActorSystem): Future[Unit] = { + implicit val bulkOptions: BulkOptions = BulkOptions( + defaultIndex = "test-bulk", + maxBulkSize = 1000, + logEvery = 10 + ) + + val largeDataset = (1 to 10000).map { i => + s"""{"id": "doc-$i", "name": "Product $i"}""" + } + + client.bulkWithResult( + items = largeDataset.iterator, + toDocument = identity, + idKey = Some("id") + ).map { result => + val metrics = result.metrics + + assert(metrics.totalDocuments == 10000, "Total documents should match") + assert(metrics.totalBatches == 10, "Should have 10 batches (1000 each)") + assert(metrics.throughput > 0, "Throughput should be calculated") + assert(metrics.duration > 0, "Duration should be recorded") + + println(s""" + |✅ Performance test passed: + | Documents: ${metrics.totalDocuments} + | Batches: ${metrics.totalBatches} + | Duration: ${metrics.duration}ms + | Throughput: ${metrics.throughput} docs/sec + """.stripMargin) + } +} +``` + +--- + +## Advanced Use Cases + +### Multi-Index Bulk Operations + +```scala +case class Document(id: String, index: String, data: String) + +val multiIndexDocs: Iterator[Document] = getDocuments() + +// Custom transformation to handle multiple indices +client.bulkWithResult( + items = multiIndexDocs, + toDocument = doc => s""" + { + "id": "${doc.id}", + "index": "${doc.index}", + "data": "${doc.data}" + } + """, + indexKey = Some("index"), // Dynamic index per document + idKey = Some("id") +)( + bulkOptions.copy(defaultIndex = "default"), // Fallback index + system +).foreach { result => + println(s"Indexed across ${result.indices.size} indices") + result.indices.foreach(idx => println(s" - $idx")) +} +``` + +### Conditional Bulk Operations + +```scala +def bulkWithCondition[D]( + items: Iterator[D], + toDocument: D => String, + condition: D => Boolean +)(implicit bulkOptions: BulkOptions, system: ActorSystem): Future[BulkResult] = { + + val filteredItems = items.filter(condition) + + client.bulkWithResult( + items = filteredItems, + toDocument = toDocument, + idKey = Some("id") + ) +} + +// Usage: Only index products with price > 0 +bulkWithCondition( + items = products, + toDocument = toJson, + condition = (p: Product) => p.price > 0 +) +``` + +### Bulk with Transformation Pipeline + +```scala +def bulkWithTransformation[D, T]( + items: Iterator[D], + transform: D => T, + toDocument: T => String +)(implicit bulkOptions: BulkOptions, system: ActorSystem): Future[BulkResult] = { + + val transformedItems = items.map(transform) + + client.bulkWithResult( + items = transformedItems, + toDocument = toDocument, + idKey = Some("id") + ) +} + +// Usage: Enrich products before indexing +case class EnrichedProduct( + id: String, + name: String, + price: Double, + category: String, + enrichedAt: String +) + +def enrichProduct(product: Product): EnrichedProduct = { + EnrichedProduct( + id = product.id, + name = product.name, + price = product.price, + category = categorize(product), + enrichedAt = java.time.Instant.now().toString + ) +} + +bulkWithTransformation( + items = products, + transform = enrichProduct, + toDocument = toJson +) +``` + +### Bulk with External API Integration + +```scala +def bulkWithExternalEnrichment[D]( + items: Iterator[D], + enrichmentApi: D => Future[D], + toDocument: D => String +)(implicit + bulkOptions: BulkOptions, + system: ActorSystem, + ec: ExecutionContext +): Future[BulkResult] = { + + // Enrich in batches to avoid overwhelming external API + val enrichedFuture = Future.sequence( + items.grouped(100).map { batch => + Future.sequence(batch.map(enrichmentApi)) + } + ).map(_.flatten) + + enrichedFuture.flatMap { enrichedItems => + client.bulkWithResult( + items = enrichedItems.iterator, + toDocument = toDocument, + idKey = Some("id") + ) + } +} +``` + +### Bulk with Deduplication + +```scala +def bulkWithDeduplication[D]( + items: Iterator[D], + getId: D => String, + toDocument: D => String +)(implicit bulkOptions: BulkOptions, system: ActorSystem): Future[BulkResult] = { + + val seen = scala.collection.mutable.Set[String]() + val dedupedItems = items.filter { item => + val id = getId(item) + if (seen.contains(id)) { + false + } else { + seen.add(id) + true + } + } + + client.bulkWithResult( + items = dedupedItems, + toDocument = toDocument, + idKey = Some("id") + ) +} +``` + +--- + +## Troubleshooting + +### Common Issues and Solutions + +**1. Out of Memory Errors** + +```scala +// Problem: Large batches causing OOM +implicit val problematic = BulkOptions(maxBulkSize = 100000) + +// Solution: Reduce batch size +implicit val fixed = BulkOptions(maxBulkSize = 1000) +``` + +**2. Slow Performance** + +```scala +// Problem: Sequential processing +implicit val slow = BulkOptions(balance = 1) + +// Solution: Increase parallelism +implicit val fast = BulkOptions( + balance = 8, + maxBulkSize = 5000, + disableRefresh = true +) +``` + +**3. Too Many Retries** + +```scala +// Problem: Retrying non-retryable errors +implicit val wasteful = BulkOptions( + retryOnFailure = true, + maxRetries = 10 +) + +// Solution: Identify and skip non-retryable errors +result.foreach { r => + r.failedDocuments.foreach { failed => + if (!failed.retryable) { + deadLetterQueue.add(failed) // Don't retry + } + } +} +``` + +**4. Index Refresh Issues** + +```scala +// Problem: Slow indexing due to frequent refresh +implicit val slow = BulkOptions(disableRefresh = false) + +// Solution: Disable refresh during bulk, refresh once at end +implicit val fast = BulkOptions(disableRefresh = true) + +client.bulkWithResult(items, toJson, Some("id")).foreach { result => + result.indices.foreach(client.refresh) // Manual refresh +} +``` + +--- + +## Comparison with Other Operations + +### Bulk vs Individual Operations + +| Aspect | Individual | Bulk | +|--------------------|------------------|-----------------------| +| **Performance** | Slow (1 req/doc) | Fast (1000s docs/req) | +| **Network** | High overhead | Minimal overhead | +| **Memory** | Low | Higher | +| **Error Handling** | Immediate | Batched | +| **Use Case** | Single documents | Large datasets | + +```scala +// Individual indexing (slow) +products.foreach { product => + client.index("products", product.id, toJson(product)) +} + +// Bulk indexing (fast) +client.bulkWithResult( + items = products, + toDocument = toJson, + idKey = Some("id") +) +``` + +--- + +## Summary + +### Key Takeaways + +1. **Use bulk operations for large datasets** (> 100 documents) +2. **Tune batch size** based on document size and memory +3. **Disable refresh** during bulk, refresh once at end +4. **Enable retry** for production reliability +5. **Monitor metrics** for performance optimization +6. **Handle failures** appropriately (retry vs dead letter queue) +7. **Use callbacks** for real-time monitoring +8. **Adjust parallelism** based on cluster size + +### Quick Reference + +```scala +// High-performance bulk indexing +implicit val options = BulkOptions( + defaultIndex = "products", + maxBulkSize = 5000, + balance = 8, + disableRefresh = true, + retryOnFailure = true, + maxRetries = 3 +) + +client.bulkWithResult( + items = products, + toDocument = toJson, + idKey = Some("id"), + callbacks = BulkCallbacks.logging(logger) +).foreach { result => + result.indices.foreach(client.refresh) + println(s"Indexed ${result.successCount} docs at ${result.metrics.throughput} docs/sec") +} +``` + +--- + +[Back to index](README.md) | [Next: Get Documents](get.md) \ No newline at end of file diff --git a/documentation/client/common_principles.md b/documentation/client/common_principles.md new file mode 100644 index 00000000..f8e621a0 --- /dev/null +++ b/documentation/client/common_principles.md @@ -0,0 +1,1051 @@ +[Back to index](README.md) + +# COMMON PRINCIPLES + +## Table of Contents + +- [Architecture Overview](common_principles.md#architecture-overview) +- [Service Provider Interface (SPI)](common_principles.md#service-provider-interface-spi) +- [Client Factory](common_principles.md#client-factory) +- [Decorator Pattern (Metrics & Monitoring)](common_principles.md#decorator-pattern-metrics--monitoring) +- [Configuration Management](common_principles.md#configuration-management) +- [Result Handling](common_principles.md#result-handling) +- [Error Model](common_principles.md#error-model) +- [Validation Helpers](common_principles.md#validation-helpers) +- [Execution Patterns](common_principles.md#execution-patterns) +- [Logging Conventions](common_principles.md#logging-conventions) + +--- + +## Architecture Overview + +### Trait Composition (Cake Pattern) + +The Elasticsearch client API is built using **trait composition**, allowing modular and extensible design: + +```scala +trait ElasticClientApi + extends IndicesApi + with SettingsApi + with AliasApi + with MappingApi + with CountApi + with SearchApi + // ... other APIs + with SerializationApi + with Closeable +``` + +**Benefits** : + +- ✅ Separation of concerns (each API is independent) +- ✅ Easy to test (mock individual traits) +- ✅ Flexible composition (choose which APIs to include) + +### Self-Type Annotations + +APIs use **self-type annotations** to declare dependencies: + +```scala +trait VersionApi extends ElasticClientHelpers { + _: SerializationApi => + // This trait requires SerializationApi to be mixed in + // ... +} +``` + +**Purpose** : + +- Ensures compile-time dependency validation +- Documents required traits explicitly +- Enables modular composition + +### Template Method Pattern + +Each API defines: + +1. **Public methods** - High-level interface for users +2. **Private implementation methods** - Client-specific logic (prefixed with `execute*`) + +```scala + // ======================================================================== + // PUBLIC METHODS + // ======================================================================== + + // Cache ES version (avoids calling it every time) + @volatile private var cachedVersion: Option[String] = None + + /** Get Elasticsearch version. + * @return + * the Elasticsearch version + */ + def version: ElasticResult[String] = { + //... + } + + // ======================================================================== + // METHODS TO IMPLEMENT + // ======================================================================== + + private[client] def executeVersion(): ElasticResult[String] + +``` + +**Implementations** : + +- `JestClientVersion` - Jest Client (ES 5-6) +- `RestHighLevelClientVersion` - REST High Level Client (ES 6-7) +- `JavaClientVersion` - Java Client (ES 8-9) + +--- + +## Service Provider Interface (SPI) + +### ElasticClientSpi + +The library uses **Java's ServiceLoader mechanism** for pluggable client implementations: + +```scala +trait ElasticClientSpi { + def client(conf: Config): ElasticClientApi +} +``` + +**Benefits** : + +- ✅ Pluggable architecture - Add new clients without modifying core code +- ✅ Loose coupling - Factory doesn't depend on concrete implementations +- ✅ Runtime discovery - Clients loaded automatically via classpath + +### Implementation Example + +```scala +class RestHighLevelClientSpi extends ElasticClientSpi { + override def client(config: Config): ElasticClientApi = { + new RestHighLevelClientApi(config) + } +} +``` + +**Registration** (in META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi): + +- app.softnetwork.elastic.client.spi.JestClientSpi (`softclient4es6-jest-client`) +- app.softnetwork.elastic.client.spi.RestHighLevelClientSpi (`softclient4es6-rest-client`, `softclient4es7-rest-client`) +- app.softnetwork.elastic.client.spi.JavaClientSpi (`softclient4es8-java-client`, `softclient4es9-java-client`) + +--- + +## Client Factory + +### ElasticClientFactory + +#### Central factory for creating and caching Elasticsearch clients: + +```scala +object ElasticClientFactory { + def create(config: Config = ConfigFactory.load()): ElasticClientApi + def createWithMetrics(config: Config = ConfigFactory.load()): MetricsElasticClient + def createWithMonitoring(config: Config = ConfigFactory.load()): MonitoredElasticClient +} +``` + +#### Exemple : + +```scala +val config = ConfigFactory.load() + +// Client created according to configuration +val client = ElasticClientFactory.create(config) + +// Normal usage +client.createIndex("products") +client.index("products", "123", """{"name": "Product"}""") + +// Access metrics if enabled +client match { + case metricsClient: MetricsElasticClient => + val metrics = metricsClient.getMetrics + println(s"Operations: ${metrics.totalOperations}") + case _ => println("Metrics not enabled") +} +``` + +### Client Creation Modes + +1. Base Client (No Metrics) + +```scala +val client = ElasticClientFactory.create(config) +``` + +2. Client with Metrics + +```scala +val client = ElasticClientFactory.createWithMetrics(config) + +// Access metrics +val metrics = client.getMetrics +println(s"Total operations: ${metrics.totalOperations}") +println(s"Success rate: ${metrics.successRate}%") +``` + +3. Client with Monitoring + +```scala +val client = ElasticClientFactory.createWithMonitoring(config) + +// Automatic periodic reports every 30s: +// === Elasticsearch Metrics === +// Total Operations: 150 +// Success Rate: 98.5% +// Average Duration: 45ms +// ============================= + +// Automatic alerts when thresholds exceeded: +// ⚠️ HIGH FAILURE RATE: 15.0% +// ⚠️ HIGH LATENCY: 1200ms + +client.logMetrics() +``` + +4. Custom Metrics Collector + +```scala +val sharedCollector = new MetricsCollector() + +// Multiple clients sharing same collector +val client1 = ElasticClientFactory.createWithCustomMetrics(config, sharedCollector) +val client2 = ElasticClientFactory.createWithCustomMetrics(config, sharedCollector) + +// Aggregated metrics across all clients +val metrics = sharedCollector.getMetrics +``` + +```text +=== Elasticsearch Metrics === +Total Operations: 106 +Success Rate: 98.11320754716981% +Failure Rate: 1.8867924528301927% +Average Duration: 35.735849056603776ms +Min Duration: 2ms +Max Duration: 223ms +============================= +``` + +### Lifecycle Management + +#### Automatic Shutdown Hook + +```scala +sys.addShutdownHook { + logger.info("JVM shutdown detected, closing all Elasticsearch clients") + ElasticClientFactory.shutdown() +} +``` + +#### Manual Shutdown + +```scala +// Shutdown all cached clients +ElasticClientFactory.shutdown() + +// Clear cache without shutdown (testing) +ElasticClientFactory.clearCache() + +// Cache statistics +val stats = ElasticClientFactory.getCacheStats +// Map("baseClients" -> 2, "metricsClients" -> 1, "monitoredClients" -> 1) +``` + +--- + +## Decorator Pattern (Metrics & Monitoring) + +### Architecture + +The library uses the `Decorator Pattern` to add metrics and monitoring capabilities without modifying the base client: + +`ElasticClientApi` (base interface) + ↑ + | +`ElasticClientDelegator` (delegation helper) + ↑ + | +`MetricsElasticClient` (adds metrics) + ↑ + | +`MonitoredElasticClient` (adds monitoring + alerts) + +### MetricsElasticClient + +Decorates any `ElasticClientApi` with metrics collection: + +```scala +class MetricsElasticClient( + val delegate: ElasticClientApi, + val metricsCollector: MetricsCollector +) extends ElasticClientDelegator with MetricsApi +``` + +#### Features: + +- ✅ Records operation duration +- ✅ Tracks success/failure rates +- ✅ Aggregates metrics by operation +- ✅ Aggregates metrics by index +- ✅ Thread-safe using AtomicLong + +#### Measurement Pattern: + +```scala +private def measureResult[T](operation: String, index: Option[String])( + block: => ElasticResult[T] +): ElasticResult[T] = { + val startTime = System.currentTimeMillis() + val result = block + val duration = System.currentTimeMillis() - startTime + metricsCollector.recordOperation(operation, duration, result.isSuccess, index) + result +} +``` + +### MetricsCollector + +Thread-safe metrics accumulator using atomic operations: + +```scala +class MetricsCollector { + private class MetricAccumulator { + val totalOps = new AtomicLong(0) + val successOps = new AtomicLong(0) + val failureOps = new AtomicLong(0) + val totalDuration = new AtomicLong(0) + val minDuration = new AtomicLong(Long.MaxValue) + val maxDuration = new AtomicLong(Long.MinValue) + val lastExecution = new AtomicLong(0) + } +} +``` + +**Atomic updates** ensure thread-safety without locks: + +```scala +totalOps.incrementAndGet() +totalDuration.addAndGet(duration) +minDuration.updateAndGet(current => Math.min(current, duration)) +``` + +### MetricsApi + +#### Global Metrics + +```scala +val metrics = client.getMetrics + +println(s"Total operations: ${metrics.totalOperations}") +println(s"Success rate: ${metrics.successRate}%") +println(s"Average duration: ${metrics.averageDuration}ms") +println(s"Min/Max: ${metrics.minDuration}ms / ${metrics.maxDuration}ms") +``` + +#### Metrics by Operation + +```scala +client.getMetricsByOperation("search").foreach { metrics => + println(s"Search operations: ${metrics.totalOperations}") + println(s"Search avg latency: ${metrics.averageDuration}ms") + + // Performance grading + val grade = metrics.averageDuration match { + case d if d < 100 => "Excellent" + case d if d < 500 => "Good" + case d if d < 1000 => "Average" + case _ => "Needs optimization" + } + println(s"Performance: $grade") +} +``` + +#### Metrics by Index + +```scala +client.getMetricsByIndex("products").foreach { metrics => + println(s"Products index operations: ${metrics.totalOperations}") + println(s"Products index avg duration: ${metrics.averageDuration}ms") +} + +// Compare index performance +val productsPerf = client.getMetricsByIndex("products") + .map(_.averageDuration).getOrElse(0.0) +val ordersPerf = client.getMetricsByIndex("orders") + .map(_.averageDuration).getOrElse(0.0) + +if (productsPerf > ordersPerf * 2) { + println("⚠️ Products index is significantly slower") +} +``` + +#### Aggregated Metrics + +```scala +val aggregated = client.getAggregatedMetrics + +println(s"=== Global ===") +println(s"Total: ${aggregated.totalOperations} ops") +println(s"Success rate: ${aggregated.successRate}%") + +println(s"=== By Operation ===") +aggregated.operationMetrics.foreach { case (op, m) => + println(s"$op: ${m.totalOperations} ops, ${m.averageDuration}ms avg") +} + +println(s"=== By Index ===") +aggregated.indexMetrics.foreach { case (idx, m) => + println(s"$idx: ${m.totalOperations} ops, ${m.averageDuration}ms avg") +} +``` + +#### Reset Metrics + +```scala +// Useful for warmup phases or testing +client.resetMetrics() +``` + +### MonitoredElasticClient + +Extends `MetricsElasticClient` with automatic monitoring and alerting: + +```scala +class MonitoredElasticClient( + delegate: ElasticClientApi, + metricsCollector: MetricsCollector, + monitoringConfig: MonitoringConfig +)(implicit system: ActorSystem) +extends MetricsElasticClient(delegate, metricsCollector) +``` + +#### Features + +- ✅ Periodic reports - Logs metrics at configured intervals +- ✅ Automatic alerts - Warns when thresholds exceeded +- ✅ Graceful shutdown - Logs final metrics before closing +- ✅ Akka Scheduler - Non-blocking periodic execution + +#### Monitoring Loop: + +```scala +system.scheduler.scheduleAtFixedRate(interval, interval) { () => + logMetrics() // Log current metrics + checkAlerts() // Check thresholds and alert +} +``` + +#### Alert Conditions: + +// High failure rate +if (metrics.failureRate > failureRateThreshold) { +logger.warn(s"⚠️ HIGH FAILURE RATE: ${metrics.failureRate}%") +} + +// High latency +if (metrics.averageDuration > latencyThreshold) { +logger.warn(s"⚠️ HIGH LATENCY: ${metrics.averageDuration}ms") +} + +--- + +## Configuration Management + +### Typesafe Config + +All configuration uses **Typesafe Config** (`HOCON` format): + +```hocon +elastic { + # Connection settings + host = "localhost" + host = ${?ELASTIC_HOST} + port = 9200 + port = ${?ELASTIC_PORT} + + # Authentication + credentials { + url = "http://"${elastic.host}":"${elastic.port} + username = "" + password = "" + url = ${?ELASTIC_CREDENTIALS_URL} + username = ${?ELASTIC_CREDENTIALS_USERNAME} + password = ${?ELASTIC_CREDENTIALS_PASSWORD} + } + + # Performance + multithreaded = true + connection-timeout = 5s + socket-timeout = 30s + + # Cluster discovery + discovery { + enabled = false + frequency = 5m + } + + # Metrics and Monitoring + metrics { + enabled = true + monitoring { + enabled = true + interval = 30s + failure-rate-threshold = 10.0 # Alert if > 10% failures + latency-threshold = 1000.0 # Alert if > 1000ms + } + } +} +``` + +### Environment Variable Overrides + +Configuration supports environment variable substitution : + +```shell +# Override host +export ELASTIC_HOST="prod-es-cluster.example.com" + +# Override credentials +export ELASTIC_CREDENTIALS_USERNAME="admin" +export ELASTIC_CREDENTIALS_PASSWORD="secret" + +# Override port +export ELASTIC_PORT=9243 +``` + +### Loading Configuration + +```scala +// Default (loads application.conf) +val config = ConfigFactory.load() +val elasticConfig = ElasticConfig(config) + +// Custom configuration file +val config = ConfigFactory.parseFile(new File("custom.conf")) + +// Programmatic configuration +val config = ConfigFactory.parseString(""" + elastic { + host = "localhost" + port = 9200 + metrics.enabled = false + } +""") +``` + +--- + +## Result Handling + +### ElasticResult[T] - Monadic ADT + +`ElasticResult[T]` is a functional wrapper for Elasticsearch operations, similar to `Try[T]` or `Either[E, T]`: + +```scala +sealed trait ElasticResult[+T] +case class ElasticSuccess[T](value: T) extends ElasticResult[T] +case class ElasticFailure(elasticError: ElasticError) extends ElasticResult[Nothing] +``` + +#### Monadic Operations + +```scala +val result: ElasticResult[String] = version + +// Transform success value +result.map(v => s"ES version: $v") + +// Chain operations +result.flatMap(v => anotherElasticOperation(v)) + +// Filter results +result.filter(v => v.startsWith("7."), "Unsupported version") +``` + +#### Extracting Values + +```scala +// Safe extraction +result.getOrElse("default-version") + +// Pattern matching +result.fold( + onFailure = error => s"Error: ${error.message}", + onSuccess = version => s"Version: $version" +) + +// Conversions +result.toOption // Option[T] +result.toEither // Either[ElasticError, T] +``` + +#### Side Effects + +```scala +// Execute on success +result.foreach(v => println(s"Version: $v")) + +// Check status +if (result.isSuccess) { /* ... */ } +if (result.isFailure) { /* ... */ } +``` + +### Utility Methods + +#### Creating results + +```scala +ElasticResult.success("7.17.0") +ElasticResult.failure("Connection failed") +ElasticResult.failure("Timeout", new TimeoutException()) +``` + +#### From Other Types + +```scala +// From Try +ElasticResult.fromTry(Try { /* operation */ }) + +// From Option +ElasticResult.fromOption(Some("value"), "Not found") + +// From Either +ElasticResult.fromEither(Right("value")) + +// From Future +ElasticResult.fromFuture(futureOperation) +``` + +#### Collection Operations + +```scala +// Sequence: List[ElasticResult[T]] => ElasticResult[List[T]] +val results: List[ElasticResult[String]] = List(...) +ElasticResult.sequence(results) + +// Traverse: Apply function to list +ElasticResult.traverse(indices)(index => getMapping(index)) +``` + +### Implicit Extensions + +#### Boolean Results + +```scala +val exists: ElasticResult[Boolean] = indexExists("my-index") + +exists.isTrue // true if ElasticSuccess(true) +exists.isFalse // true if ElasticSuccess(false) +exists.succeeded // true if successful (ignores value) +``` + +#### Logging + +```scala +result + .logSuccess(logger, v => s"Version retrieved: $v") + .logError(logger) +``` + +--- + +## Error Model + +### ElasticError Structure + +```scala +case class ElasticError( + message: String, // Human-readable error message + cause: Option[Throwable] = None, // Root exception + statusCode: Option[Int] = None, // HTTP status code + index: Option[String] = None, // Related index + operation: Option[String] = None // ES operation name +) +``` + +### Full Context Message + +```scala +val error = ElasticError( + message = "Index not found", + statusCode = Some(404), + index = Some("my-index"), + operation = Some("getMapping") +) + +error.fullMessage +// Output: "[getMapping] index=my-index status=404 Index not found" +``` + +### Logging Errors + +```scala +// Automatic logging with context +error.log(logger) + +// Logs with exception stacktrace if cause is present +``` + +### Common Status Codes + +| **Code** | **Meaning** | **Example** | +|-----------|-------------------|----------------------------------------| +| 200-299 | Success | Index created, document indexed | +| 400 | Bad Request | Invalid index name, malformed JSON | +| 404 | Not Found | Index/document doesn't exist | +| 409 | Conflict | Version conflict, index already exists | +| 429 | Too Many Requests | Rate limiting, circuit breaker | +| 500-599 | Server Error | ES cluster issue, node failure | + +--- + +## Validation Helpers + +### ElasticClientHelpers + +Provides common validation methods used across all APIs: + +#### Index Name Validation + +```scala +protected def validateIndexName(index: String): Option[ElasticError] +``` + +**Elasticsearch Rules** : + +- ✅ Not empty +- ✅ Lowercase only +- ✅ No characters: `\`, `/`, `*`, `?`, `"`, `<`, `>`, `|`, space, comma, `#` +- ✅ Does not start with `-`, `_`, `+` +- ✅ Is not `.` or `..` +- ✅ Maximum 255 characters + +**Usage** : + +```scala +validateIndexName("my-index") match { + case Some(error) => ElasticFailure(error) + case None => // proceed with operation +} +``` + +#### JSON Validation + +```scala +protected def validateJson( + operation: String, + jsonString: String +): Option[ElasticError] + +protected def validateJsonSettings(settings: String): Option[ElasticError] +``` + +**Validates** : + +- ✅ Non-empty +- ✅ No comments +- ✅ Valid JSON syntax (using json4s parser) + +#### Alias Name Validation + +```scala +protected def validateAliasName(alias: String): Option[ElasticError] +``` + +Aliases follow the same rules as index names. + +#### Error Logging + +```scala +protected def logError( + operation: String, + indexStr: String, + error: ElasticError +): Unit +``` + +**Logging Levels by Status Code** : + +- 404 → `DEBUG` (not always an error, e.g., indexExists) +- 400-499 → `WARN` (client error) +- 500-599 → `ERROR` (server error) +- Other → `ERROR` + +--- + +## Execution Patterns + +### REST High Level Client Helpers + +The `RestHighLevelClientHelpers` trait provides generic execution methods for all REST High Level Client operations. + +#### Generic Execution + +```scala +private[client] def executeRestAction[Req, Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true +)( + request: => Req +)( + executor: Req => Resp +)( + transformer: Resp => T +): ElasticResult[T] +``` + +**Flow** : + +1. **Validation** (if needed, before calling) +2. **Execution** - Wrapped in `Try` to catch exceptions +3. **Error Handling** - Converts exceptions to `ElasticError` +4. **Transformation** - Converts response to desired type `T` + +**Exemple** : + +```scala +executeRestAction[CreateIndexRequest, CreateIndexResponse, Boolean]( + operation = "createIndex", + index = Some("my-index") +)( + request = new CreateIndexRequest("my-index") +)( + executor = req => client.indices().create(req, RequestOptions.DEFAULT) +)( + transformer = _.isAcknowledged +) +``` + +#### Boolean Operations (Acknowledged) + +```scala +private[client] def executeRestBooleanAction[Req, Resp <: AcknowledgedResponse]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true +)( + request: => Req +)( + executor: Req => Resp +): ElasticResult[Boolean] +``` + +Simplified variant for operations returning `AcknowledgedResponse`. + +#### Low-Level REST Client + +```scala +private[client] def executeRestLowLevelAction[T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true +)( + request: => Request +)( + transformer: Response => T +): ElasticResult[T] +``` + +Used for operations not available in the high-level client (e.g., `/_cat`, custom endpoints). + +**Example** : + +```scala +executeRestLowLevelAction[String]( + operation = "version" +)( + request = new Request("GET", "/") +)( + transformer = resp => { + val json = parse(EntityUtils.toString(resp.getEntity)) + (json \\ "version" \\ "number").extract[String] + } +) +``` + +#### Asynchronous Execution + +```scala +private[client] def executeAsyncRestAction[Req, Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true +)( + request: => Req +)( + executor: (Req, ActionListener[Resp]) => Unit +)( + transformer: Resp => T +)(implicit ec: ExecutionContext): Future[ElasticResult[T]] +``` + +Returns `Future[ElasticResult[T]]` for non-blocking operations. + +### Java Client Helpers + +The `JavaClientHelpers` trait provides similar generic execution methods for all Java operations. + +#### Generic Execution + +```scala +private[client] def executeJavaAction[Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + transformer: Resp => T + ): ElasticResult[T] +``` + +#### Boolean Operations + +```scala +private[client] def executeJavaBooleanAction[Resp]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + acknowledgedExtractor: Resp => Boolean + ): ElasticResult[Boolean] +``` + +#### Asynchronous Execution + +```scala +private[client] def executeAsyncJavaAction[Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + transformer: Resp => T + )(implicit ec: ExecutionContext): Future[ElasticResult[T]] +``` + +### Jest Client Helpers + +The `JestClientHelpers` trait provides similar generic execution methods for all Jest operations. + +#### Generic Execution + +```scala +private[client] def executeJestAction[R <: JestResult: ClassTag, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Action[R] + )( + transformer: R => T + ): ElasticResult[T] +``` + +#### Boolean Operations + +```scala +private[client] def executeJestBooleanAction[R <: JestResult: ClassTag]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Action[R] + ): ElasticResult[Boolean] +``` + +#### Asynchronous Execution + +```scala +private[client] def executeAsyncJestAction[R <: JestResult: ClassTag, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Action[R] + )( + transformer: R => T + )(implicit ec: ExecutionContext): Future[ElasticResult[T]] +``` + +### Exception Handling + +#### ElasticsearchException + +```scala +case Failure(ex: ElasticsearchException) => + val statusCode = Option(ex.status()).map(_.getStatus) + ElasticFailure(ElasticError( + message = ex.getDetailedMessage, + cause = Some(ex), + statusCode = statusCode + )) +``` + +#### ResponseException (Low-Level) + +```scala +case Failure(ex: ResponseException) => + val statusCode = Some(ex.getResponse.getStatusLine.getStatusCode) + ElasticFailure(ElasticError( + message = ex.getMessage, + cause = Some(ex), + statusCode = statusCode + )) +``` + +#### Generic Exceptions + +```scala +case Failure(ex) => + ElasticFailure(ElasticError( + message = ex.getMessage, + cause = Some(ex) + )) +``` + +--- + +## Logging Conventions + +### Log Levels + +| **Level** | **Usage** | **Example** | +|------------|------------------------------------|---------------------------------------------------| +| DEBUG | Operation start/end, 404 responses | "Executing operation 'version'" | +| INFO | Successful operations | "✅ Elasticsearch version: 7.17.0" | +| WARN | Client errors (4xx) | "Client error during 'createIndex': Invalid name" | +| ERROR | Server errors (5xx), exceptions | "❌ Failed to get version: Connection timeout" | + +### Log Format + +```scala +// Operation start +logger.debug(s"Executing operation '$operation'$indexStr") + +// Success +logger.info(s"✅ Operation '$operation'$indexStr succeeded") + +// Failure +logger.error(s"❌ Operation '$operation'$indexStr failed: ${error.message}") +``` + +### Emojis for Readability + +- ✅ Success +- ❌ Failure +- ⚠️ Warning/Alert + +--- + +[Back to index](README.md) \ No newline at end of file diff --git a/documentation/client/delete.md b/documentation/client/delete.md new file mode 100644 index 00000000..ee987924 --- /dev/null +++ b/documentation/client/delete.md @@ -0,0 +1,944 @@ +[Back to index](README.md) + +# DELETE API + +## Overview + +The **DeleteApi** trait provides functionality to delete documents from Elasticsearch indices, supporting both synchronous and asynchronous operations with comprehensive validation and error handling. + +**Features:** +- Synchronous and asynchronous document deletion +- Automatic index refresh after deletion +- Index name validation +- Comprehensive error handling and logging +- Safe deletion with existence checking + +**Dependencies:** +- Requires `RefreshApi` for automatic refresh after deletion + +--- + +## Understanding Delete Operations + +**Delete Behavior:** +- Deletes a document by its ID from a specific index +- Returns `true` if document was deleted +- Returns `false` if document doesn't exist (not an error) +- Automatically refreshes index after successful deletion + +**Idempotency:** +- Delete operations are idempotent +- Deleting a non-existent document returns success (false) but doesn't fail +- Safe to retry delete operations + +--- + +## Public Methods + +### delete + +Deletes a document from an Elasticsearch index by ID. + +**Signature:** + +```scala +def delete(id: String, index: String): ElasticResult[Boolean] +``` + +**Parameters:** +- `id` - The document ID to delete +- `index` - The index name containing the document + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if document was deleted +- `ElasticSuccess[Boolean]` with `false` if document doesn't exist +- `ElasticFailure` with error details (400 for validation errors) + +**Validation:** +- Index name format validation + +**Behavior:** +- Automatically refreshes index after successful deletion +- Logs success/failure with appropriate emoji indicators +- Returns success even if document doesn't exist (idempotent) + +**Examples:** + +```scala +// Basic document deletion +client.delete("prod-001", "products") match { + case ElasticSuccess(true) => println("✅ Document deleted") + case ElasticSuccess(false) => println("⚠️ Document not found") + case ElasticFailure(e) => println(s"❌ Error: ${e.message}") +} + +// Delete with existence check +def deleteIfExists(id: String, index: String): ElasticResult[Boolean] = { + for { + exists <- client.exists(id, index) + result <- if (exists) { + client.delete(id, index) + } else { + ElasticResult.success(false) + } + } yield result +} + +// Delete multiple documents +val idsToDelete = List("prod-001", "prod-002", "prod-003") + +idsToDelete.foreach { id => + client.delete(id, "products") match { + case ElasticSuccess(true) => println(s"✅ Deleted: $id") + case ElasticSuccess(false) => println(s"⚠️ Not found: $id") + case ElasticFailure(e) => println(s"❌ Failed: $id - ${e.message}") + } +} + +// Batch deletion with result tracking +def deleteMany(ids: List[String], index: String): (Int, Int, Int) = { + val results = ids.map(id => client.delete(id, index)) + + val deleted = results.count { + case ElasticSuccess(true) => true + case _ => false + } + + val notFound = results.count { + case ElasticSuccess(false) => true + case _ => false + } + + val failed = results.count { + case ElasticFailure(_) => true + case _ => false + } + + (deleted, notFound, failed) +} + +val (deleted, notFound, failed) = deleteMany(idsToDelete, "products") +println(s"Deleted: $deleted, Not found: $notFound, Failed: $failed") + +// Conditional deletion +def deleteOldDocuments(index: String, cutoffDate: String): ElasticResult[List[Boolean]] = { + for { + oldDocs <- client.searchByDateRange(index, "created_at", None, Some(cutoffDate)) + results <- ElasticResult.sequence( + oldDocs.map(doc => client.delete(doc.id, index)) + ) + } yield results +} + +// Delete with retry logic +def deleteWithRetry( + id: String, + index: String, + maxRetries: Int = 3 +): ElasticResult[Boolean] = { + def attempt(remaining: Int): ElasticResult[Boolean] = { + client.delete(id, index) match { + case success @ ElasticSuccess(_) => success + case failure if remaining > 0 => + Thread.sleep(1000) + attempt(remaining - 1) + case failure => failure + } + } + attempt(maxRetries) +} + +// Safe deletion with validation +def safeDelete(id: String, index: String): ElasticResult[Boolean] = { + for { + exists <- client.exists(id, index) + _ <- if (!exists) { + ElasticResult.failure(s"Document $id does not exist in index $index") + } else { + ElasticResult.success(()) + } + deleted <- client.delete(id, index) + } yield deleted +} + +// Delete with audit trail +def deleteWithAudit( + id: String, + index: String, + userId: String +): ElasticResult[Boolean] = { + for { + deleted <- client.delete(id, index) + _ <- if (deleted) { + logAuditEvent(s"User $userId deleted document $id from $index") + } else { + ElasticResult.success(()) + } + } yield deleted +} + +// Error handling +client.delete("prod-001", "INVALID INDEX") match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.message.contains("Invalid index")) +} + +// Monadic composition +def archiveAndDelete(id: String, index: String): ElasticResult[Boolean] = { + for { + document <- client.get(id, index) + _ <- client.index("archive", id, document) + deleted <- client.delete(id, index) + } yield deleted +} +``` + +--- + +### deleteAsync + +Asynchronously deletes a document from an Elasticsearch index. + +**Signature:** + +```scala +def deleteAsync( + id: String, + index: String +)(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] +``` + +**Parameters:** +- `id` - The document ID to delete +- `index` - The index name containing the document +- `ec` - Implicit ExecutionContext for async execution + +**Returns:** +- `Future[ElasticResult[Boolean]]` that completes when deletion finishes +- `true` if document was deleted, `false` if not found + +**Validation:** +- Index name validation performed synchronously before async execution + +**Examples:** + +```scala +import scala.concurrent.ExecutionContext.Implicits.global +import scala.concurrent.Future + +// Basic async deletion +client.deleteAsync("prod-001", "products").onComplete { + case Success(ElasticSuccess(true)) => + println("✅ Document deleted") + case Success(ElasticSuccess(false)) => + println("⚠️ Document not found") + case Success(ElasticFailure(error)) => + println(s"❌ Error: ${error.message}") + case Failure(ex) => + println(s"❌ Future failed: ${ex.getMessage}") +} + +// Batch async deletion +val idsToDelete = List("prod-001", "prod-002", "prod-003") + +val deleteFutures = idsToDelete.map { id => + client.deleteAsync(id, "products") +} + +Future.sequence(deleteFutures).map { results => + val deletedCount = results.count { + case ElasticSuccess(true) => true + case _ => false + } + val notFoundCount = results.count { + case ElasticSuccess(false) => true + case _ => false + } + println(s"Deleted: $deletedCount, Not found: $notFoundCount") +} + +// Chained async operations +def fetchAndDelete(id: String, index: String): Future[ElasticResult[Boolean]] = { + for { + exists <- client.existsAsync(id, index) + result <- if (exists) { + client.deleteAsync(id, index) + } else { + Future.successful(ElasticResult.success(false)) + } + } yield result +} + +// Parallel deletion with rate limiting +def deleteWithRateLimit( + ids: List[String], + index: String, + maxConcurrent: Int = 10 +): Future[List[ElasticResult[Boolean]]] = { + ids.grouped(maxConcurrent).foldLeft(Future.successful(List.empty[ElasticResult[Boolean]])) { + case (accFuture, batch) => + accFuture.flatMap { acc => + val batchFutures = batch.map { id => + client.deleteAsync(id, index) + } + Future.sequence(batchFutures).map(acc ++ _) + } + } +} + +// Archive before delete +def archiveAndDeleteAsync( + id: String, + sourceIndex: String, + archiveIndex: String +): Future[ElasticResult[Boolean]] = { + for { + doc <- client.getAsync(id, sourceIndex) + _ <- client.indexAsync(archiveIndex, id, doc) + deleted <- client.deleteAsync(id, sourceIndex) + } yield deleted +} + +// Error recovery +client.deleteAsync("prod-001", "products") + .recover { + case ex: Exception => + ElasticFailure(ElasticError( + message = s"Delete failed: ${ex.getMessage}", + cause = Some(ex) + )) + } + .foreach { + case ElasticSuccess(true) => println("Success") + case ElasticSuccess(false) => println("Not found") + case ElasticFailure(e) => println(s"Failed: ${e.message}") + } + +// Conditional async deletion +def deleteIfConditionAsync( + id: String, + index: String, + condition: String => Future[Boolean] +): Future[ElasticResult[Boolean]] = { + condition(id).flatMap { shouldDelete => + if (shouldDelete) { + client.deleteAsync(id, index) + } else { + Future.successful(ElasticResult.success(false)) + } + } +} + +// Streaming deletion +import akka.stream.scaladsl._ + +def streamDelete( + index: String, + source: Source[String, _] +): Future[Int] = { + source + .mapAsync(parallelism = 10) { id => + client.deleteAsync(id, index) + } + .runFold(0) { (count, result) => + result match { + case ElasticSuccess(true) => count + 1 + case _ => count + } + } +} + +// Retry with exponential backoff +def deleteWithRetryAsync( + id: String, + index: String, + maxRetries: Int = 3 +): Future[ElasticResult[Boolean]] = { + + def attempt(remaining: Int, delay: Long = 1000): Future[ElasticResult[Boolean]] = { + client.deleteAsync(id, index).flatMap { + case success @ ElasticSuccess(_) => Future.successful(success) + case failure if remaining > 0 => + Future { + Thread.sleep(delay) + }.flatMap(_ => attempt(remaining - 1, delay * 2)) + case failure => Future.successful(failure) + } + } + + attempt(maxRetries) +} + +// Collect deletion results with errors +def deleteAllWithErrors( + ids: List[String], + index: String +): Future[(List[String], List[(String, ElasticError)])] = { + + val futures = ids.map { id => + client.deleteAsync(id, index).map(result => (id, result)) + } + + Future.sequence(futures).map { results => + val (successes, failures) = results.partition { + case (_, ElasticSuccess(true)) => true + case _ => false + } + + val deletedIds = successes.map(_._1) + val failureDetails = failures.collect { + case (id, ElasticFailure(error)) => (id, error) + } + + (deletedIds, failureDetails) + } +} + +// Await result (for testing) +import scala.concurrent.Await +import scala.concurrent.duration._ + +val result = Await.result( + client.deleteAsync("prod-001", "products"), + 5.seconds +) +``` + +--- + +## Implementation Requirements + +### executeDelete + +```scala +private[client] def executeDelete( + index: String, + id: String +): ElasticResult[Boolean] +``` + +**Implementation Example:** + +```scala +private[client] def executeDelete( + index: String, + id: String +): ElasticResult[Boolean] = { + executeRestAction[DeleteResponse, Boolean]( + operation = "delete", + index = Some(index) + )( + action = { + val request = new DeleteRequest(index, id) + client.delete(request, RequestOptions.DEFAULT) + } + )( + transformer = resp => { + resp.getResult == DocWriteResponse.Result.DELETED + } + ) +} +``` + +--- + +### executeDeleteAsync + +```scala +private[client] def executeDeleteAsync( + index: String, + id: String +)(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] +``` + +**Implementation Example:** + +```scala +private[client] def executeDeleteAsync( + index: String, + id: String +)(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] = { + val promise = Promise[ElasticResult[Boolean]]() + + val request = new DeleteRequest(index, id) + + client.deleteAsync( + request, + RequestOptions.DEFAULT, + new ActionListener[DeleteResponse] { + override def onResponse(response: DeleteResponse): Unit = { + val deleted = response.getResult == DocWriteResponse.Result.DELETED + promise.success(ElasticSuccess(deleted)) + } + + override def onFailure(e: Exception): Unit = { + promise.success(ElasticFailure(ElasticError( + message = s"Async delete failed: ${e.getMessage}", + operation = Some("deleteAsync"), + index = Some(index), + cause = Some(e) + ))) + } + } + ) + + promise.future +} +``` + +--- + +## Common Patterns + +### Repository Pattern with Delete + +```scala +trait Repository[T <: AnyRef] { + def delete(id: String)(implicit + ct: ClassTag[T], + client: ElasticClient + ): ElasticResult[Boolean] = { + val indexName = ct.runtimeClass.getSimpleName.toLowerCase + client.delete(id, indexName) + } + + def deleteAsync(id: String)(implicit + ct: ClassTag[T], + ec: ExecutionContext, + client: ElasticClient + ): Future[ElasticResult[Boolean]] = { + val indexName = ct.runtimeClass.getSimpleName.toLowerCase + client.deleteAsync(id, indexName) + } +} + +case class Product(name: String, price: Double) + +object ProductRepository extends Repository[Product] { + def deleteProduct(id: String)(implicit + client: ElasticClient + ): ElasticResult[Boolean] = { + delete(id) + } +} +``` + +### Soft Delete Pattern + +```scala +case class SoftDeletable(deleted: Boolean, deletedAt: Option[String]) + +def softDelete(id: String, index: String): ElasticResult[Boolean] = { + val update = SoftDeletable( + deleted = true, + deletedAt = Some(java.time.Instant.now().toString) + ) + client.updateAs(update, id, Some(index), upsert = false) +} + +def hardDelete(id: String, index: String): ElasticResult[Boolean] = { + client.delete(id, index) +} + +// Query only non-deleted documents +def searchActive(index: String, query: String): ElasticResult[List[Document]] = { + val searchQuery = s""" + { + "query": { + "bool": { + "must": [ + {"match": {"_all": "$query"}}, + {"term": {"deleted": false}} + ] + } + } + } + """ + client.search(index, searchQuery) +} +``` + +### Archive Before Delete + +```scala +def archiveAndDelete( + id: String, + sourceIndex: String, + archiveIndex: String +): ElasticResult[Boolean] = { + for { + // Get document + document <- client.get(id, sourceIndex) + + // Archive it + _ <- client.index(archiveIndex, id, document) + + // Delete from source + deleted <- client.delete(id, sourceIndex) + } yield deleted +} + +// Async version +def archiveAndDeleteAsync( + id: String, + sourceIndex: String, + archiveIndex: String +)(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] = { + for { + document <- client.getAsync(id, sourceIndex) + _ <- client.indexAsync(archiveIndex, id, document) + deleted <- client.deleteAsync(id, sourceIndex) + } yield deleted +} +``` + +### Bulk Delete by Query + +```scala +def deleteByQuery( + index: String, + query: String +): ElasticResult[Int] = { + for { + // Search for matching documents + results <- client.search(index, query) + ids = results.map(_.id) + + // Delete each document + deleteResults <- ElasticResult.sequence( + ids.map(id => client.delete(id, index)) + ) + + // Count successful deletions + deletedCount = deleteResults.count(_ == true) + } yield deletedCount +} + +// Example: Delete all products with price > 1000 +val expensiveProductsQuery = """ +{ + "query": { + "range": { + "price": { + "gt": 1000 + } + } + } +} +""" +deleteByQuery("products", expensiveProductsQuery) +``` + +### Cascading Delete + +```scala +def cascadeDelete( + parentId: String, + parentIndex: String, + childIndex: String, + parentField: String +): ElasticResult[Int] = { + for { + // Find all child documents + childQuery = s""" + { + "query": { + "term": { + "$parentField": "$parentId" + } + } + } + """ + children <- client.search(childIndex, childQuery) + + // Delete all children + childResults <- ElasticResult.sequence( + children.map(child => client.delete(child.id, childIndex)) + ) + + // Delete parent + _ <- client.delete(parentId, parentIndex) + + // Count total deletions + totalDeleted = childResults.count(_ == true) + 1 + } yield totalDeleted +} + +// Example: Delete order and all its items +cascadeDelete("order-123", "orders", "order-items", "order_id") +``` + +--- + +## Performance Optimization + +### Batch Delete with Disabled Refresh + +```scala +def batchDeleteOptimized( + ids: List[String], + index: String +): ElasticResult[Int] = { + for { + // Disable refresh + _ <- client.toggleRefresh(index, enable = false) + + // Delete all documents + results <- ElasticResult.sequence( + ids.map(id => client.delete(id, index)) + ) + + // Re-enable refresh + _ <- client.toggleRefresh(index, enable = true) + + // Manual refresh + _ <- client.refresh(index) + + // Count deletions + deletedCount = results.count(_ == true) + } yield deletedCount +} +``` + +### Parallel Async Delete + +```scala +def parallelDelete( + ids: List[String], + index: String, + parallelism: Int = 10 +)(implicit ec: ExecutionContext): Future[Int] = { + + ids + .grouped(parallelism) + .foldLeft(Future.successful(0)) { case (accFuture, batch) => + accFuture.flatMap { acc => + val batchFutures = batch.map { id => + client.deleteAsync(id, index) + } + + Future.sequence(batchFutures).map { results => + val batchCount = results.count { + case ElasticSuccess(true) => true + case _ => false + } + acc + batchCount + } + } + } +} +``` + +--- + +## Error Handling + +**Invalid Index Name:** + +```scala +client.delete("doc-001", "INVALID INDEX") match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.message.contains("Invalid index")) + assert(error.operation.contains("delete")) +} +``` + +**Document Not Found (Not an Error):** + +```scala +client.delete("non-existent-id", "products") match { + case ElasticSuccess(false) => + println("⚠️ Document not found, but operation succeeded") + case ElasticSuccess(true) => + println("✅ Document deleted") + case ElasticFailure(error) => + println(s"❌ Actual error: ${error.message}") +} +``` + +**Async Failure Handling:** + +```scala +client.deleteAsync("doc-001", "products") + .recover { + case ex: Exception => + ElasticFailure(ElasticError( + message = s"Async delete failed: ${ex.getMessage}", + operation = Some("deleteAsync"), + cause = Some(ex) + )) + } + .foreach { + case ElasticSuccess(true) => println("Deleted") + case ElasticSuccess(false) => println("Not found") + case ElasticFailure(e) => println(s"Error: ${e.message}") + } +``` + +--- + +## Best Practices + +**1. Check Existence Before Delete (Optional)** + +```scala +// ✅ Direct delete (idempotent) +client.delete("prod-001", "products") + +// ✅ Check existence first (if you need to know) +for { + exists <- client.exists("prod-001", "products") + deleted <- if (exists) { + client.delete("prod-001", "products") + } else { + ElasticResult.success(false) + } +} yield deleted +``` + +**2. Use Async for Batch Operations** + +```scala +// ✅ Good - non-blocking batch delete +val futures = ids.map(id => client.deleteAsync(id, "products")) +Future.sequence(futures) + +// ❌ Avoid - blocking batch delete +ids.foreach(id => client.delete(id, "products")) +``` + +**3. Archive Important Data Before Deletion** + +```scala +// ✅ Good - archive before delete +def safeDelete(id: String, index: String): ElasticResult[Boolean] = { + for { + doc <- client.get(id, index) + _ <- client.index("archive", id, doc) + deleted <- client.delete(id, index) + } yield deleted +} + +// ❌ Risky - direct delete without backup +client.delete(id, index) +``` + +**4. Handle Deletion Results Appropriately** + +```scala +// ✅ Good - distinguish between deleted and not found +client.delete("prod-001", "products") match { + case ElasticSuccess(true) => + println("Document was deleted") + case ElasticSuccess(false) => + println("Document didn't exist") + case ElasticFailure(e) => + println(s"Error occurred: ${e.message}") +} + +// ❌ Avoid - treating not found as error +client.delete("prod-001", "products") match { + case ElasticSuccess(true) => println("Success") + case _ => println("Failed") // Too broad +} +``` + +**5. Use Soft Delete for Recoverable Data** + +```scala +// ✅ Good - soft delete for user data +def softDeleteUser(id: String): ElasticResult[Boolean] = { + val update = """{"deleted": true, "deleted_at": "2024-01-15"}""" + client.update("users", id, update, upsert = false) +} + +// ✅ Good - hard delete for temporary data +def hardDeleteSession(id: String): ElasticResult[Boolean] = { + client.delete(id, "sessions") +} +``` + +--- + +## Comparison with Related Operations + +### Delete vs Update (Soft Delete) + +| Operation | Data Retained | Recoverable | Performance | Use Case | +|-----------|---------------|-------------|-------------|----------| +| **Hard Delete** | No | No | Fast | Temporary data, logs | +| **Soft Delete** | Yes | Yes | Slower | User data, orders | + +```scala +// Hard delete +client.delete("doc-001", "products") + +// Soft delete +client.updateAs(SoftDelete(deleted = true), "doc-001", Some("products")) +``` + +--- + +## Testing Scenarios + +### Test Delete Functionality + +```scala +def testDelete(): Unit = { + val testIndex = "test-delete-index" + val testId = "test-doc-001" + + for { + // Create document + _ <- client.createIndex(testIndex) + _ <- client.index(testIndex, testId, """{"name": "test"}""") + + // Verify exists + exists1 <- client.exists(testId, testIndex) + _ = assert(exists1, "Document should exist before delete") + + // Delete + deleted <- client.delete(testId, testIndex) + _ = assert(deleted, "Delete should return true") + + // Verify deleted + exists2 <- client.exists(testId, testIndex) + _ = assert(!exists2, "Document should not exist after delete") + + // Delete again (idempotent) + deleted2 <- client.delete(testId, testIndex) + _ = assert(!deleted2, "Second delete should return false") + + // Cleanup + _ <- client.deleteIndex(testIndex) + } yield () +} +``` + +### Test Async Delete + +```scala +def testDeleteAsync()(implicit ec: ExecutionContext): Future[Unit] = { + val testIndex = "test-async-delete" + val testId = "test-doc-001" + + for { + _ <- client.createIndexAsync(testIndex) + _ <- client.indexAsync(testIndex, testId, """{"name": "test"}""") + + result <- client.deleteAsync(testId, testIndex) + _ = result match { + case ElasticSuccess(true) => println("✅ Async delete successful") + case _ => throw new Exception("Async delete failed") + } + + _ <- client.deleteIndexAsync(testIndex) + } yield () +} +``` + +--- + +[Back to index](README.md) | [Next: Bulk Operations](bulk.md) \ No newline at end of file diff --git a/documentation/client/flush.md b/documentation/client/flush.md new file mode 100644 index 00000000..55bd285f --- /dev/null +++ b/documentation/client/flush.md @@ -0,0 +1,565 @@ +[Back to index](README.md) + +# FLUSH API + +## Overview + +The **FlushApi** trait provides functionality to flush Elasticsearch indices, ensuring all in-memory operations are written to disk and creating a new Lucene commit point. + +**Features:** +- Force flush operations to disk +- Wait for flush completion +- Index name validation +- Comprehensive error handling and logging + +**Use Cases:** +- Ensure data durability before backup/snapshot +- Prepare for cluster maintenance +- Guarantee data persistence after critical operations +- Testing and development scenarios + +--- + +## Understanding Flush + +**What is Flush?** +- Writes all in-memory index data (translog) to disk +- Creates a new Lucene commit point +- Clears the translog after successful write +- Makes data recoverable after unexpected shutdown + +**Flush vs Refresh:** +- **Refresh:** Makes recent changes searchable (in-memory operation) +- **Flush:** Persists data to disk (I/O operation) + +**When to Use Flush:** +- Before taking snapshots +- Before cluster shutdown/restart +- After critical bulk operations +- When maximum durability is required + +--- + +## Public Methods + +### flush + +Flushes an index to ensure all operations are written to disk. + +**Signature:** + +```scala +def flush( + index: String, + force: Boolean = true, + wait: Boolean = true +): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - The index name to flush +- `force` - Whether to force the flush even if not necessary (default: `true`) +- `wait` - Whether to wait for flush completion (default: `true`) + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if flushed successfully +- `ElasticFailure` with error details (400 for validation errors) + +**Validation:** +- Index name format validation + +**Behavior:** +- **force = true:** Always performs flush regardless of whether changes exist +- **force = false:** Only flushes if there are uncommitted changes +- **wait = true:** Blocks until flush completes +- **wait = false:** Returns immediately (asynchronous flush) + +**Examples:** + +```scala +// Basic flush +client.flush("products") match { + case ElasticSuccess(true) => println("Index flushed") + case ElasticSuccess(false) => println("Flush not needed") + case ElasticFailure(e) => println(s"Error: ${e.message}") +} + +// Force flush with wait +client.flush("critical-data", force = true, wait = true) + +// Opportunistic flush (only if needed) +client.flush("logs", force = false, wait = true) + +// Asynchronous flush +client.flush("temp-index", force = true, wait = false) + +// Flush after bulk operations +for { + _ <- client.bulkIndex(largeDataset) + _ <- client.refresh("products") + _ <- client.flush("products") +} yield "Data persisted to disk" + +// Pre-backup flush +def prepareForBackup(index: String): ElasticResult[Unit] = { + for { + _ <- client.flush(index, force = true, wait = true) + _ = println(s"✅ Index $index ready for backup") + } yield () +} + +// Flush multiple indices +def flushAll(indices: List[String]): ElasticResult[List[Boolean]] = { + ElasticResult.sequence( + indices.map(index => client.flush(index)) + ) +} + +val indices = List("users", "products", "orders") +flushAll(indices) match { + case ElasticSuccess(results) => + results.zip(indices).foreach { case (success, index) => + if (success) println(s"✅ $index flushed") + else println(s"⚠️ $index not flushed") + } + case ElasticFailure(e) => + println(s"Flush failed: ${e.message}") +} + +// Conditional flush +def flushIfNeeded(index: String, threshold: Long): ElasticResult[Boolean] = { + // Check translog size (implementation depends on stats API) + getTranslogSize(index).flatMap { size => + if (size > threshold) { + client.flush(index, force = true, wait = true) + } else { + ElasticResult.success(false) + } + } +} + +// Error handling +client.flush("invalid index name") match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.message.contains("Invalid index")) +} +``` + +--- + +## Implementation Requirements + +### executeFlush + +```scala +private[client] def executeFlush( + index: String, + force: Boolean, + wait: Boolean +): ElasticResult[Boolean] +``` + +**Implementation Example:** + +```scala +private[client] def executeFlush( + index: String, + force: Boolean, + wait: Boolean +): ElasticResult[Boolean] = { + executeRestAction[FlushResponse, Boolean]( + operation = "flush", + index = Some(index) + )( + action = { + val request = new FlushRequest(index) + .force(force) + .waitIfOngoing(wait) + client.indices().flush(request, RequestOptions.DEFAULT) + } + )( + transformer = resp => { + resp.getFailedShards == 0 + } + ) +} +``` + +--- + +## Common Workflows + +### Pre-Backup Flush + +```scala +def prepareIndicesForBackup(indices: List[String]): ElasticResult[Unit] = { + for { + // Flush all indices + _ <- indices.foldLeft(ElasticResult.success(())) { (acc, index) => + acc.flatMap(_ => client.flush(index, force = true, wait = true).map(_ => ())) + } + _ = println("✅ All indices flushed and ready for backup") + } yield () +} + +// Usage +prepareIndicesForBackup(List("users", "products", "orders")) +``` + +### Bulk Operation with Flush + +```scala +def bulkIndexWithPersistence[T]( + index: String, + documents: Seq[T] +): ElasticResult[Unit] = { + for { + // Disable refresh for performance + _ <- client.toggleRefresh(index, enable = false) + + // Bulk index + _ <- client.bulkIndex(documents) + + // Re-enable refresh + _ <- client.toggleRefresh(index, enable = true) + + // Make searchable + _ <- client.refresh(index) + + // Persist to disk + _ <- client.flush(index, force = true, wait = true) + } yield () +} +``` + +### Scheduled Maintenance Flush + +```scala +import java.util.concurrent.{Executors, TimeUnit} + +def schedulePeriodicFlush( + index: String, + intervalMinutes: Int +): Unit = { + val scheduler = Executors.newScheduledThreadPool(1) + + scheduler.scheduleAtFixedRate( + () => { + client.flush(index, force = false, wait = true) match { + case ElasticSuccess(true) => + println(s"✅ Periodic flush completed for $index") + case ElasticSuccess(false) => + println(s"⚠️ No flush needed for $index") + case ElasticFailure(e) => + println(s"❌ Periodic flush failed for $index: ${e.message}") + } + }, + intervalMinutes, + intervalMinutes, + TimeUnit.MINUTES + ) +} + +// Flush every 30 minutes +schedulePeriodicFlush("logs", 30) +``` + +### Critical Operation Pattern + +```scala +def performCriticalOperation[T]( + index: String, + operation: => ElasticResult[T] +): ElasticResult[T] = { + for { + // Perform operation + result <- operation + + // Ensure persistence + _ <- client.flush(index, force = true, wait = true) + + // Verify + _ = println(s"✅ Critical operation completed and persisted for $index") + } yield result +} + +// Usage +performCriticalOperation("financial-transactions", { + client.index("financial-transactions", criticalTransaction) +}) +``` + +### Cluster Shutdown Preparation + +```scala +def prepareForShutdown(indices: List[String]): ElasticResult[Unit] = { + for { + // Flush all indices + flushResults <- ElasticResult.sequence( + indices.map(index => client.flush(index, force = true, wait = true)) + ) + + // Verify all succeeded + _ <- if (flushResults.forall(_ == true)) { + println("✅ All indices flushed successfully") + ElasticResult.success(()) + } else { + ElasticResult.failure("Some indices failed to flush") + } + } yield () +} +``` + +--- + +## Performance Considerations + +### Flush Impact + +**I/O Impact:** +- Flush is an I/O-intensive operation +- Can temporarily impact cluster performance +- Avoid frequent flushes in high-throughput scenarios + +**Best Practices:** + +```scala +// ❌ Bad - too frequent +documents.foreach { doc => + client.index("my-index", doc) + client.flush("my-index") // Very expensive! +} + +// ✅ Good - batch and flush once +documents.foreach { doc => + client.index("my-index", doc) +} +client.flush("my-index") + +// ✅ Better - use bulk operations +client.bulkIndex(documents) +client.flush("my-index") +``` + +### Translog Configuration + +```scala +// Configure translog durability in index settings +val durableSettings = """ +{ + "index": { + "translog.durability": "request", + "translog.sync_interval": "5s" + } +} +""" + +// Less durable but faster +val asyncSettings = """ +{ + "index": { + "translog.durability": "async", + "translog.sync_interval": "30s" + } +} +""" +``` + +### Selective Flushing + +```scala +def flushCriticalIndicesOnly( + allIndices: List[String], + criticalIndices: Set[String] +): ElasticResult[Unit] = { + val indicesToFlush = allIndices.filter(criticalIndices.contains) + + for { + _ <- indicesToFlush.foldLeft(ElasticResult.success(())) { (acc, index) => + acc.flatMap(_ => client.flush(index, force = true, wait = true).map(_ => ())) + } + } yield () +} + +val critical = Set("financial-transactions", "user-accounts") +flushCriticalIndicesOnly(allIndices, critical) +``` + +--- + +## Error Handling + +**Invalid Index Name:** + +```scala +client.flush("INVALID INDEX") match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.operation.contains("refresh")) // Note: logs as "refresh" +} +``` + +**Index Not Found:** + +```scala +client.flush("non-existent-index") match { + case ElasticFailure(error) => + println(s"Flush failed: ${error.message}") +} +``` + +**Partial Failure:** + +```scala +client.flush("my-index") match { + case ElasticSuccess(false) => + println("⚠️ Flush completed but some shards failed") + case ElasticSuccess(true) => + println("✅ All shards flushed successfully") + case ElasticFailure(error) => + println(s"❌ Flush failed: ${error.message}") +} +``` + +--- + +## Comparison with Related Operations + +### Flush vs Refresh vs Fsync + +| Operation | Purpose | Scope | Cost | +|-----------|---------|-------|------| +| **Refresh** | Make changes searchable | In-memory | Low | +| **Flush** | Persist to disk | Disk I/O | Medium | +| **Fsync** | OS-level sync | Disk I/O | High | + +### When to Use Each + +```scala +// After bulk indexing - make searchable +client.bulkIndex(documents) +client.refresh("my-index") + +// Before backup - ensure durability +client.flush("my-index", force = true, wait = true) + +// Critical transaction - maximum durability +client.index("financial-transactions", transaction) +client.flush("financial-transactions", force = true, wait = true) +``` + +--- + +## Best Practices + +**1. Flush After Bulk Operations** + +```scala +def safeBulkIndex[T](index: String, docs: Seq[T]): ElasticResult[Unit] = { + for { + _ <- client.bulkIndex(docs) + _ <- client.refresh(index) + _ <- client.flush(index) + } yield () +} +``` + +**2. Use force = false for Regular Maintenance** + +```scala +// Only flush if there are uncommitted changes +def maintenanceFlush(index: String): ElasticResult[Boolean] = { + client.flush(index, force = false, wait = true) +} +``` + +**3. Always Wait for Critical Operations** + +```scala +// For critical data, always wait +def persistCriticalData[T](index: String, data: T): ElasticResult[Unit] = { + for { + _ <- client.index(index, data) + _ <- client.flush(index, force = true, wait = true) + } yield () +} +``` + +**4. Batch Flushes for Multiple Indices** + +```scala +def batchFlush(indices: List[String]): ElasticResult[Unit] = { + indices.foldLeft(ElasticResult.success(())) { (acc, index) => + acc.flatMap(_ => + client.flush(index, force = false, wait = true).map(_ => ()) + ) + } +} +``` + +**5. Monitor Flush Performance** + +```scala +def timedFlush(index: String): ElasticResult[(Boolean, Long)] = { + val start = System.currentTimeMillis() + client.flush(index).map { result => + val duration = System.currentTimeMillis() - start + println(s"Flush took ${duration}ms") + (result, duration) + } +} +``` + +--- + +## Testing Scenarios + +### Test Flush Functionality + +```scala +def testFlush(): Unit = { + val testIndex = "test-flush-index" + + for { + // Create and populate + _ <- client.createIndex(testIndex) + _ <- client.index(testIndex, testDocument) + + // Flush + flushed <- client.flush(testIndex, force = true, wait = true) + _ = assert(flushed, "Flush should succeed") + + // Cleanup + _ <- client.deleteIndex(testIndex) + } yield () +} +``` + +### Test Durability + +```scala +def testDurability(): Unit = { + val testIndex = "test-durability" + + for { + _ <- client.createIndex(testIndex) + _ <- client.index(testIndex, testDoc) + _ <- client.flush(testIndex, force = true, wait = true) + + // Simulate restart by closing and reopening + _ <- client.closeIndex(testIndex) + _ <- client.openIndex(testIndex) + + // Verify data still exists + result <- client.search(testIndex, matchAllQuery) + _ = assert(result.nonEmpty, "Data should persist after flush") + + _ <- client.deleteIndex(testIndex) + } yield () +} +``` + +--- + +[Back to index](README.md) | [Next: Refresh Index](refresh.md) \ No newline at end of file diff --git a/documentation/client/get.md b/documentation/client/get.md new file mode 100644 index 00000000..e22acac7 --- /dev/null +++ b/documentation/client/get.md @@ -0,0 +1,1915 @@ +[Back to index](README.md) + +# GET API + +## Overview + +The **Get API** provides methods to retrieve documents from Elasticsearch by their document ID. It supports both synchronous and asynchronous operations, with automatic type conversion and comprehensive error handling. + +**Key Features:** +- **Document retrieval by ID** (synchronous and asynchronous) +- **Type-safe deserialization** with automatic conversion +- **Document existence checking** without retrieving full content +- **Automatic index name inference** from entity types +- **Comprehensive error handling** with detailed error messages +- **Input validation** for index names and document IDs +- **Logging integration** for debugging and monitoring + +**Dependencies:** +- Requires `SerializationApi` for JSON conversion +- Requires `ElasticClientHelpers` for validation utilities + +--- + +## Table of Contents + +1. [Core Concepts](#core-concepts) +2. [Basic Usage](#basic-usage) +3. [Typed Retrieval](#typed-retrieval) +4. [Asynchronous Operations](#asynchronous-operations) +5. [Document Existence Check](#document-existence-check) +6. [Error Handling](#error-handling) +7. [Advanced Patterns](#advanced-patterns) +8. [Testing](#testing) +9. [Best Practices](#best-practices) + +--- + +## Core Concepts + +### API Methods Overview + +The Get API provides four main methods: + +```scala +trait GetApi { + // Check if document exists + def exists(id: String, index: String): ElasticResult[Boolean] + + // Get document as JSON string + def get(id: String, index: String): ElasticResult[Option[String]] + + // Get document as typed entity + def getAs[U <: AnyRef]( + id: String, + index: Option[String] = None, + maybeType: Option[String] = None + )(implicit m: Manifest[U], formats: Formats): ElasticResult[Option[U]] + + // Async: Get document as JSON string + def getAsync( + id: String, + index: String + )(implicit ec: ExecutionContext): Future[ElasticResult[Option[String]]] + + // Async: Get document as typed entity + def getAsyncAs[U <: AnyRef]( + id: String, + index: Option[String] = None, + maybeType: Option[String] = None + )(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats + ): Future[ElasticResult[Option[U]]] +} +``` + +--- + +### Method Comparison + +| Method | Return Type | Async | Type-Safe | Use Case | +|--------------|-----------------------------------------|--------|------------|--------------------------| +| `exists` | `ElasticResult[Boolean]` | ❌ | N/A | Check document existence | +| `get` | `ElasticResult[Option[String]]` | ❌ | ❌ | Get raw JSON | +| `getAs` | `ElasticResult[Option[U]]` | ❌ | ✅ | Get typed entity | +| `getAsync` | `Future[ElasticResult[Option[String]]]` | ✅ | ❌ | Get raw JSON (async) | +| `getAsyncAs` | `Future[ElasticResult[Option[U]]]` | ✅ | ✅ | Get typed entity (async) | + +--- + +### Return Types + +All methods return `ElasticResult` or `Future[ElasticResult]`: + +```scala +// Synchronous result +sealed trait ElasticResult[+T] { + def map[U](f: T => U): ElasticResult[U] + def flatMap[U](f: T => ElasticResult[U]): ElasticResult[U] +} + +case class ElasticSuccess[T](value: T) extends ElasticResult[T] +case class ElasticFailure(error: ElasticError) extends ElasticResult[Nothing] + +// Error details +case class ElasticError( + message: String, + statusCode: Option[Int] = None, + index: Option[String] = None, + operation: Option[String] = None, + cause: Option[Throwable] = None +) +``` + +--- + +## Basic Usage + +### Simple Document Retrieval + +```scala +// Get document by ID +val result: ElasticResult[Option[String]] = client.get( + id = "product-123", + index = "products" +) + +result match { + case ElasticSuccess(Some(json)) => + println(s"✅ Document found: $json") + + case ElasticSuccess(None) => + println("⚠️ Document not found") + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} +``` + +--- + +### Processing Retrieved Document + +```scala +import org.json4s._ +import org.json4s.jackson.JsonMethods._ + +val result = client.get("product-123", "products") + +result match { + case ElasticSuccess(Some(jsonString)) => + val json = parse(jsonString) + val name = (json \ "name").extract[String] + val price = (json \ "price").extract[Double] + + println(s"Product: $name") + println(f"Price: $$${price}%.2f") + + case ElasticSuccess(None) => + println("Product not found") + + case ElasticFailure(error) => + println(s"Failed to retrieve product: ${error.message}") +} +``` + +--- + +### Using flatMap for Chaining + +```scala +val result = client.get("product-123", "products").flatMap { jsonOpt => + jsonOpt match { + case Some(json) => + // Process the JSON + val processed = processJson(json) + ElasticSuccess(processed) + + case None => + ElasticFailure(ElasticError("Document not found")) + } +} + +def processJson(json: String): String = { + // Processing logic + json.toUpperCase +} +``` + +--- + +## Typed Retrieval + +### Basic Typed Retrieval + +```scala +import org.json4s.DefaultFormats + +case class Product( + id: String, + name: String, + price: Double, + category: String, + stock: Int +) + +implicit val formats: Formats = DefaultFormats + +// Get document as typed entity +val result: ElasticResult[Option[Product]] = client.getAs[Product]( + id = "product-123", + index = Some("products") +) + +result match { + case ElasticSuccess(Some(product)) => + println(s"✅ Product: ${product.name}") + println(f" Price: $$${product.price}%.2f") + println(s" Stock: ${product.stock} units") + + case ElasticSuccess(None) => + println("⚠️ Product not found") + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} +``` + +--- + +### Automatic Index Inference + +```scala +case class Product(id: String, name: String, price: Double) + +// Index name automatically inferred from class name (lowercase) +// Will use "product" as index name +val result = client.getAs[Product](id = "123") + +result match { + case ElasticSuccess(Some(product)) => + println(s"Found product: ${product.name}") + + case ElasticSuccess(None) => + println("Product not found") + + case ElasticFailure(error) => + println(s"Error: ${error.message}") +} +``` + +--- + +### Custom Index and Type Names + +```scala +case class Product(id: String, name: String, price: Double) + +// Specify custom index name +val result1 = client.getAs[Product]( + id = "123", + index = Some("my-products-index") +) + +// Specify custom type name (affects index inference) +val result2 = client.getAs[Product]( + id = "123", + maybeType = Some("catalog-item") +) + +// Both custom index and type +val result3 = client.getAs[Product]( + id = "123", + index = Some("my-products-index"), + maybeType = Some("catalog-item") +) +``` + +--- + +### Nested Objects + +```scala +case class Address( + street: String, + city: String, + country: String, + zipCode: String +) + +case class Customer( + id: String, + name: String, + email: String, + address: Address, + orders: Seq[String] +) + +implicit val formats: Formats = DefaultFormats + +val result = client.getAs[Customer]( + id = "customer-456", + index = Some("customers") +) + +result match { + case ElasticSuccess(Some(customer)) => + println(s"Customer: ${customer.name}") + println(s"Email: ${customer.email}") + println(s"City: ${customer.address.city}") + println(s"Orders: ${customer.orders.size}") + + case ElasticSuccess(None) => + println("Customer not found") + + case ElasticFailure(error) => + println(s"Error: ${error.message}") +} +``` + +--- + +### Custom Formats + +```scala +import org.json4s._ +import org.json4s.ext.JavaTimeSerializers +import java.time.LocalDateTime + +case class Order( + id: String, + customerId: String, + total: Double, + createdAt: LocalDateTime, + status: String +) + +// Custom formats with date serializers +implicit val formats: Formats = DefaultFormats ++ JavaTimeSerializers.all + +val result = client.getAs[Order]( + id = "order-789", + index = Some("orders") +) + +result match { + case ElasticSuccess(Some(order)) => + println(s"Order ID: ${order.id}") + println(s"Created: ${order.createdAt}") + println(f"Total: $$${order.total}%.2f") + + case ElasticSuccess(None) => + println("Order not found") + + case ElasticFailure(error) => + println(s"Error: ${error.message}") +} +``` + +--- + +## Asynchronous Operations + +### Basic Async Retrieval + +```scala +import scala.concurrent.ExecutionContext.Implicits.global +import scala.concurrent.Future + +// Async get as JSON +val futureResult: Future[ElasticResult[Option[String]]] = + client.getAsync("product-123", "products") + +futureResult.foreach { + case ElasticSuccess(Some(json)) => + println(s"✅ Document retrieved: $json") + + case ElasticSuccess(None) => + println("⚠️ Document not found") + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} +``` + +--- + +### Async Typed Retrieval + +```scala +case class Product(id: String, name: String, price: Double) + +implicit val formats: Formats = DefaultFormats +implicit val ec: ExecutionContext = ExecutionContext.global + +// Async get as typed entity +val futureResult: Future[ElasticResult[Option[Product]]] = + client.getAsyncAs[Product]( + id = "product-123", + index = Some("products") + ) + +futureResult.foreach { + case ElasticSuccess(Some(product)) => + println(s"✅ Product: ${product.name}") + println(f" Price: $$${product.price}%.2f") + + case ElasticSuccess(None) => + println("⚠️ Product not found") + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} +``` + +--- + +### Chaining Async Operations + +```scala +// Chain multiple async operations +val result: Future[ElasticResult[String]] = for { + productResult <- client.getAsyncAs[Product]("product-123", Some("products")) + customerResult <- productResult match { + case ElasticSuccess(Some(product)) => + // Get related customer + client.getAsyncAs[Customer](product.customerId, Some("customers")) + case _ => + Future.successful(ElasticFailure(ElasticError("Product not found"))) + } +} yield { + customerResult match { + case ElasticSuccess(Some(customer)) => + ElasticSuccess(s"Customer: ${customer.name}") + case ElasticSuccess(None) => + ElasticFailure(ElasticError("Customer not found")) + case failure @ ElasticFailure(_) => + failure + } +} + +result.foreach { + case ElasticSuccess(message) => println(message) + case ElasticFailure(error) => println(s"Error: ${error.message}") +} +``` + +--- + +### Parallel Async Operations + +```scala +// Retrieve multiple documents in parallel +val ids = Seq("product-1", "product-2", "product-3") + +val futures: Seq[Future[ElasticResult[Option[Product]]]] = ids.map { id => + client.getAsyncAs[Product](id, Some("products")) +} + +// Wait for all to complete +Future.sequence(futures).foreach { results => + val products = results.flatMap { + case ElasticSuccess(Some(product)) => Some(product) + case _ => None + } + + println(s"Retrieved ${products.size} products:") + products.foreach { p => + println(s" - ${p.name}: $$${p.price}") + } +} +``` + +--- + +### Error Recovery + +```scala +val result = client.getAsyncAs[Product]("product-123", Some("products")) + .recover { + case ex: Exception => + println(s"Recovered from exception: ${ex.getMessage}") + ElasticFailure(ElasticError( + message = "Failed to retrieve product", + cause = Some(ex) + )) + } + +result.foreach { + case ElasticSuccess(Some(product)) => + println(s"Product: ${product.name}") + case ElasticSuccess(None) => + println("Product not found") + case ElasticFailure(error) => + println(s"Error: ${error.message}") +} +``` + +--- + +## Document Existence Check + +### Basic Existence Check + +```scala +// Check if document exists +val exists: ElasticResult[Boolean] = client.exists( + id = "product-123", + index = "products" +) + +exists match { + case ElasticSuccess(true) => + println("✅ Document exists") + + case ElasticSuccess(false) => + println("⚠️ Document does not exist") + + case ElasticFailure(error) => + println(s"❌ Error checking existence: ${error.message}") +} +``` + +--- + +### Conditional Retrieval + +```scala +def getProductIfExists(id: String): ElasticResult[Option[Product]] = { + client.exists(id, "products") match { + case ElasticSuccess(true) => + // Document exists, retrieve it + client.getAs[Product](id, Some("products")) + + case ElasticSuccess(false) => + // Document doesn't exist + ElasticSuccess(None) + + case failure @ ElasticFailure(_) => + // Error checking existence + failure.asInstanceOf[ElasticResult[Option[Product]]] + } +} + +// Usage +getProductIfExists("product-123") match { + case ElasticSuccess(Some(product)) => + println(s"Product found: ${product.name}") + case ElasticSuccess(None) => + println("Product not found") + case ElasticFailure(error) => + println(s"Error: ${error.message}") +} +``` + +--- + +### Batch Existence Check + +```scala +def checkMultipleExists(ids: Seq[String], index: String): Map[String, Boolean] = { + ids.map { id => + val exists = client.exists(id, index) match { + case ElasticSuccess(value) => value + case ElasticFailure(_) => false + } + id -> exists + }.toMap +} + +// Usage +val ids = Seq("product-1", "product-2", "product-3", "product-4") +val existenceMap = checkMultipleExists(ids, "products") + +existenceMap.foreach { case (id, exists) => + if (exists) { + println(s"✅ $id exists") + } else { + println(s"❌ $id does not exist") + } +} +``` + +--- + +### Async Existence Check + +```scala +def existsAsync(id: String, index: String) + (implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] = { + client.getAsync(id, index).map { + case ElasticSuccess(Some(_)) => ElasticSuccess(true) + case ElasticSuccess(None) => ElasticSuccess(false) + case failure @ ElasticFailure(_) => failure + } +} + +// Usage +existsAsync("product-123", "products").foreach { + case ElasticSuccess(true) => println("Document exists") + case ElasticSuccess(false) => println("Document does not exist") + case ElasticFailure(error) => println(s"Error: ${error.message}") +} +``` + +--- + +## Error Handling + +### Common Error Scenarios + +```scala +val result = client.get("product-123", "products") + +result match { + // Success - document found + case ElasticSuccess(Some(json)) => + println(s"✅ Document: $json") + + // Success - document not found (404) + case ElasticSuccess(None) => + println("⚠️ Document not found") + + // Failure - various error types + case ElasticFailure(error) => + error.statusCode match { + case Some(400) => + println(s"❌ Bad request: ${error.message}") + + case Some(404) => + println(s"❌ Not found: ${error.message}") + + case Some(500) => + println(s"❌ Server error: ${error.message}") + + case _ => + println(s"❌ Error: ${error.message}") + } +} +``` + +--- + +### Error Details + +```scala +val result = client.get("product-123", "invalid-index!") + +result match { + case ElasticFailure(error) => + println(s"Message: ${error.message}") + println(s"Status Code: ${error.statusCode.getOrElse("N/A")}") + println(s"Index: ${error.index.getOrElse("N/A")}") + println(s"Operation: ${error.operation.getOrElse("N/A")}") + + error.cause.foreach { throwable => + println(s"Cause: ${throwable.getMessage}") + throwable.printStackTrace() + } + + case _ => + println("No error") +} +``` + +--- + +### Validation Errors + +```scala +// Invalid index name +val result1 = client.get("doc-123", "INVALID_INDEX!") + +result1 match { + case ElasticFailure(error) => + println(s"Validation error: ${error.message}") + // Output: "Invalid index: index name must be lowercase..." + + case _ => () +} + +// Empty document ID +val result2 = client.get("", "products") + +result2 match { + case ElasticFailure(error) => + println(s"Validation error: ${error.message}") + + case _ => () +} +``` + +--- + +### Deserialization Errors + +```scala +case class Product(id: String, name: String, price: Double) + +implicit val formats: Formats = DefaultFormats + +// Document has incompatible structure +val result = client.getAs[Product]("invalid-doc", "products") + +result match { + case ElasticSuccess(Some(product)) => + println(s"Product: ${product.name}") + + case ElasticSuccess(None) => + println("Product not found") + + case ElasticFailure(error) => + // Deserialization error + println(s"❌ Failed to deserialize: ${error.message}") + error.cause.foreach { ex => + println(s" Cause: ${ex.getMessage}") + } +} +``` + +--- + +### Comprehensive Error Handler + +```scala +def handleGetResult[T](result: ElasticResult[Option[T]]): Option[T] = { + result match { + case ElasticSuccess(Some(value)) => + logger.info("✅ Document retrieved successfully") + Some(value) + + case ElasticSuccess(None) => + logger.warn("⚠️ Document not found") + None + + case ElasticFailure(error) => + error.statusCode match { + case Some(400) => + logger.error(s"❌ Bad request: ${error.message}") + case Some(404) => + logger.warn(s"⚠️ Not found: ${error.message}") + case Some(500) => + logger.error(s"❌ Server error: ${error.message}") + case Some(503) => + logger.error(s"❌ Service unavailable: ${error.message}") + case _ => + logger.error(s"❌ Unexpected error: ${error.message}") + } + + error.cause.foreach { throwable => + logger.error("Exception details:", throwable) + } + + None + } +} + +// Usage +val product = handleGetResult( + client.getAs[Product]("product-123", Some("products")) +) + +product match { + case Some(p) => println(s"Product: ${p.name}") + case None => println("Failed to retrieve product") +} +``` + +--- + +## Advanced Patterns + +### Caching Pattern + +```scala +import scala.collection.concurrent.TrieMap + +class CachedProductRepository { + private val cache = TrieMap.empty[String, Product] + + def getProduct(id: String): ElasticResult[Option[Product]] = { + // Check cache first + cache.get(id) match { + case Some(product) => + logger.debug(s"Cache hit for product $id") + ElasticSuccess(Some(product)) + + case None => + // Cache miss, fetch from Elasticsearch + logger.debug(s"Cache miss for product $id") + client.getAs[Product](id, Some("products")) match { + case success @ ElasticSuccess(Some(product)) => + // Store in cache + cache.put(id, product) + success + + case other => other + } + } + } + + def invalidate(id: String): Unit = { + cache.remove(id) + } + + def clear(): Unit = { + cache.clear() + } +} + +// Usage +val repo = new CachedProductRepository() + +val product1 = repo.getProduct("product-123") // Cache miss, fetches from ES +val product2 = repo.getProduct("product-123") // Cache hit, no ES call +``` + +--- + +### Fallback Pattern + +```scala +def getProductWithFallback(id: String): ElasticResult[Option[Product]] = { + // Try primary index + client.getAs[Product](id, Some("products")) match { + case success @ ElasticSuccess(Some(_)) => + success + + case ElasticSuccess(None) => + // Try fallback index + logger.warn(s"Product $id not found in primary index, trying fallback") + client.getAs[Product](id, Some("products-archive")) + + case failure @ ElasticFailure(_) => + failure + } +} + +// Usage +getProductWithFallback("product-123") match { + case ElasticSuccess(Some(product)) => + println(s"Product found: ${product.name}") + case ElasticSuccess(None) => + println("Product not found in any index") + case ElasticFailure(error) => + println(s"Error: ${error.message}") +} +``` + +--- + +### Batch Retrieval + +```scala +def getBatch[T <: AnyRef]( + ids: Seq[String], + index: String +)(implicit + m: Manifest[T], + formats: Formats +): Map[String, T] = { + ids.flatMap { id => + client.getAs[T](id, Some(index)) match { + case ElasticSuccess(Some(entity)) => + Some(id -> entity) + case _ => + logger.warn(s"Failed to retrieve document $id") + None + } + }.toMap +} + +// Usage +val productIds = Seq("product-1", "product-2", "product-3") +val products: Map[String, Product] = getBatch[Product](productIds, "products") + +products.foreach { case (id, product) => + println(s"$id: ${product.name}") +} +``` + +--- + +### Async Batch Retrieval + +```scala +def getBatchAsync[T <: AnyRef]( + ids: Seq[String], + index: String +)(implicit + m: Manifest[T], + ec: ExecutionContext, + formats: Formats +): Future[Map[String, T]] = { + val futures = ids.map { id => + client.getAsyncAs[T](id, Some(index)).map { result => + result match { + case ElasticSuccess(Some(entity)) => Some(id -> entity) + case _ => None + } + } + } + + Future.sequence(futures).map(_.flatten.toMap) +} + +// Usage +getBatchAsync[Product]( + Seq("product-1", "product-2", "product-3"), + "products" +).foreach { products => + println(s"Retrieved ${products.size} products") + products.foreach { case (id, product) => + println(s" $id: ${product.name}") + } +} +``` + +--- + +### Retry Pattern + +```scala +import scala.annotation.tailrec +import scala.concurrent.duration._ + +@tailrec +def getWithRetry[T <: AnyRef]( + id: String, + index: String, + maxRetries: Int = 3, + retryDelay: Duration = 1.second +)(implicit + m: Manifest[T], + formats: Formats +): ElasticResult[Option[T]] = { + client.getAs[T](id, Some(index)) match { + case success @ ElasticSuccess(_) => + success + + case failure @ ElasticFailure(error) if maxRetries > 0 => + error.statusCode match { + case Some(500) | Some(503) => + // Retry on server errors + logger.warn(s"Retrying after ${retryDelay.toMillis}ms (${maxRetries} retries left)") + Thread.sleep(retryDelay.toMillis) + getWithRetry(id, index, maxRetries - 1, retryDelay) + + case _ => + // Don't retry on client errors + failure + } + + case failure => + failure + } +} + +// Usage +val product = getWithRetry[Product]("product-123", "products") +``` + +--- + +### Validation and Transformation + +```scala +def getAndValidate[T <: AnyRef]( + id: String, + index: String, + validate: T => Either[String, T] +)(implicit + m: Manifest[T], + formats: Formats +): ElasticResult[Option[T]] = { + client.getAs[T](id, Some(index)).flatMap { + case Some(entity) => + validate(entity) match { + case Right(validEntity) => + ElasticSuccess(Some(validEntity)) + case Left(error) => + ElasticFailure(ElasticError( + message = s"Validation failed: $error", + index = Some(index), + operation = Some("getAndValidate") + )) + } + case None => + ElasticSuccess(None) + } +} + +// Usage +def validateProduct(product: Product): Either[String, Product] = { + if (product.price < 0) { + Left("Price cannot be negative") + } else if (product.name.isEmpty) { + Left("Name cannot be empty") + } else { + Right(product) + } +} + +getAndValidate[Product]("product-123", "products", validateProduct) match { + case ElasticSuccess(Some(product)) => + println(s"Valid product: ${product.name}") + case ElasticSuccess(None) => + println("Product not found") + case ElasticFailure(error) => + println(s"Error: ${error.message}") +} +``` + +--- + +## Testing + +### Test Basic Get + +```scala +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class GetApiSpec extends AnyFlatSpec with Matchers { + + "GetApi" should "retrieve existing document" in { + val testIndex = "test-get" + val docId = "test-doc-1" + val docContent = """{"id":"test-doc-1","name":"Test Product","price":99.99}""" + + // Setup + client.createIndex(testIndex) + client.index(testIndex, docId, docContent) + client.refresh(testIndex) + + // Test + val result = client.get(docId, testIndex) + + // Assertions + result match { + case ElasticSuccess(Some(json)) => + json should include("Test Product") + json should include("99.99") + + case other => + fail(s"Expected success, got: $other") + } + + // Cleanup + client.deleteIndex(testIndex) + } + + it should "return None for non-existent document" in { + val testIndex = "test-get" + + // Setup + client.createIndex(testIndex) + + // Test + val result = client.get("non-existent-id", testIndex) + + // Assertions + result match { + case ElasticSuccess(None) => + // Expected + succeed + + case other => + fail(s"Expected None, got: $other") + } + + // Cleanup + client.deleteIndex(testIndex) + } +} +``` + +--- + +## Test Typed Get + +```scala +"GetApi" should "retrieve and deserialize document" in { + case class TestProduct(id: String, name: String, price: Double) + implicit val formats: Formats = DefaultFormats + + val testIndex = "test-typed-get" + val docId = "product-1" + val docContent = """{"id":"product-1","name":"Laptop","price":999.99}""" + + // Setup + client.createIndex(testIndex) + client.index(testIndex, docId, docContent) + client.refresh(testIndex) + + // Test + val result = client.getAs[TestProduct](docId, Some(testIndex)) + + // Assertions + result match { + case ElasticSuccess(Some(product)) => + product.id shouldBe "product-1" + product.name shouldBe "Laptop" + product.price shouldBe 999.99 + + case other => + fail(s"Expected success, got: $other") + } + + // Cleanup + client.deleteIndex(testIndex) +} +``` + +--- + +### Test Existence Check + +```scala +"GetApi" should "check document existence correctly" in { + val testIndex = "test-exists" + val docId = "test-doc-1" + val docContent = """{"id":"test-doc-1","value":"test"}""" + + // Setup + client.createIndex(testIndex) + client.index(testIndex, docId, docContent) + client.refresh(testIndex) + + // Test - document exists + val existsResult = client.exists(docId, testIndex) + existsResult match { + case ElasticSuccess(true) => succeed + case other => fail(s"Expected true, got: $other") + } + + // Test - document doesn't exist + val notExistsResult = client.exists("non-existent-id", testIndex) + notExistsResult match { + case ElasticSuccess(false) => succeed + case other => fail(s"Expected false, got: $other") + } + + // Cleanup + client.deleteIndex(testIndex) +} +``` + +--- + +### Test Async Operations + +```scala +"GetApi" should "retrieve document asynchronously" in { + implicit val ec: ExecutionContext = ExecutionContext.global + + val testIndex = "test-async-get" + val docId = "async-doc-1" + val docContent = """{"id":"async-doc-1","name":"Async Product","price":149.99}""" + + // Setup + client.createIndex(testIndex) + client.index(testIndex, docId, docContent) + client.refresh(testIndex) + + // Test + val futureResult = client.getAsync(docId, testIndex) + + futureResult.map { + case ElasticSuccess(Some(json)) => + json should include("Async Product") + json should include("149.99") + + case other => + fail(s"Expected success, got: $other") + }.flatMap { _ => + // Cleanup + Future.successful(client.deleteIndex(testIndex)) + } +} +``` + +--- + +### Test Async Typed Get + +```scala +"GetApi" should "retrieve and deserialize document asynchronously" in { + case class AsyncProduct(id: String, name: String, price: Double) + implicit val formats: Formats = DefaultFormats + implicit val ec: ExecutionContext = ExecutionContext.global + + val testIndex = "test-async-typed" + val docId = "product-1" + val docContent = """{"id":"product-1","name":"Wireless Mouse","price":29.99}""" + + // Setup + client.createIndex(testIndex) + client.index(testIndex, docId, docContent) + client.refresh(testIndex) + + // Test + val futureResult = client.getAsyncAs[AsyncProduct](docId, Some(testIndex)) + + futureResult.map { + case ElasticSuccess(Some(product)) => + product.id shouldBe "product-1" + product.name shouldBe "Wireless Mouse" + product.price shouldBe 29.99 + + case other => + fail(s"Expected success, got: $other") + }.flatMap { _ => + // Cleanup + Future.successful(client.deleteIndex(testIndex)) + } +} +``` + +--- + +### Test Error Handling + +```scala +"GetApi" should "handle invalid index name" in { + val result = client.get("doc-1", "INVALID_INDEX!") + + result match { + case ElasticFailure(error) => + error.statusCode shouldBe Some(400) + error.message should include("Invalid index") + error.operation shouldBe Some("get") + + case other => + fail(s"Expected failure, got: $other") + } +} + +it should "handle deserialization errors" in { + case class StrictProduct(id: String, name: String, price: Double, requiredField: String) + implicit val formats: Formats = DefaultFormats + + val testIndex = "test-deser-error" + val docId = "incomplete-doc" + // Missing requiredField + val docContent = """{"id":"incomplete-doc","name":"Product","price":99.99}""" + + // Setup + client.createIndex(testIndex) + client.index(testIndex, docId, docContent) + client.refresh(testIndex) + + // Test + val result = client.getAs[StrictProduct](docId, Some(testIndex)) + + result match { + case ElasticFailure(error) => + error.message should include("Failed to retrieve") + + case other => + fail(s"Expected failure, got: $other") + } + + // Cleanup + client.deleteIndex(testIndex) +} +``` + +--- + +### Test Nested Objects + +```scala +"GetApi" should "handle nested objects" in { + case class Address(street: String, city: String, zipCode: String) + case class Customer(id: String, name: String, email: String, address: Address) + + implicit val formats: Formats = DefaultFormats + + val testIndex = "test-nested" + val docId = "customer-1" + val docContent = """{ + "id": "customer-1", + "name": "John Doe", + "email": "john@example.com", + "address": { + "street": "123 Main St", + "city": "New York", + "zipCode": "10001" + } + }""" + + // Setup + client.createIndex(testIndex) + client.index(testIndex, docId, docContent) + client.refresh(testIndex) + + // Test + val result = client.getAs[Customer](docId, Some(testIndex)) + + result match { + case ElasticSuccess(Some(customer)) => + customer.id shouldBe "customer-1" + customer.name shouldBe "John Doe" + customer.email shouldBe "john@example.com" + customer.address.street shouldBe "123 Main St" + customer.address.city shouldBe "New York" + customer.address.zipCode shouldBe "10001" + + case other => + fail(s"Expected success, got: $other") + } + + // Cleanup + client.deleteIndex(testIndex) +} +``` + +--- + +### Test Batch Operations + +```scala +"GetApi" should "retrieve multiple documents" in { + case class TestDoc(id: String, value: Int) + implicit val formats: Formats = DefaultFormats + + val testIndex = "test-batch" + + // Setup + client.createIndex(testIndex) + (1 to 5).foreach { i => + client.index(testIndex, s"doc-$i", s"""{"id":"doc-$i","value":$i}""") + } + client.refresh(testIndex) + + // Test - retrieve all documents + val results = (1 to 5).map { i => + client.getAs[TestDoc](s"doc-$i", Some(testIndex)) + } + + // Assertions + results.foreach { + case ElasticSuccess(Some(doc)) => + doc.value should be > 0 + doc.value should be <= 5 + + case other => + fail(s"Expected success, got: $other") + } + + // Cleanup + client.deleteIndex(testIndex) +} +``` + +--- + +### Test Index Inference + +```scala +"GetApi" should "infer index name from type" in { + case class Product(id: String, name: String) + implicit val formats: Formats = DefaultFormats + + val testIndex = "product" // Lowercase class name + val docId = "product-1" + val docContent = """{"id":"product-1","name":"Test Product"}""" + + // Setup + client.createIndex(testIndex) + client.index(testIndex, docId, docContent) + client.refresh(testIndex) + + // Test - without explicit index (should infer "product") + val result = client.getAs[Product](docId) + + result match { + case ElasticSuccess(Some(product)) => + product.id shouldBe "product-1" + product.name shouldBe "Test Product" + + case other => + fail(s"Expected success, got: $other") + } + + // Cleanup + client.deleteIndex(testIndex) +} +``` + +--- + +## Best Practices + +### 1. Always Handle All Result Cases + +```scala +// ❌ BAD: Only handling success case +val result = client.get("doc-1", "products") +val json = result match { + case ElasticSuccess(Some(json)) => json +} +// Throws MatchError if document not found or error occurs + +// ✅ GOOD: Handle all cases +val result = client.get("doc-1", "products") +val json = result match { + case ElasticSuccess(Some(json)) => + Some(json) + case ElasticSuccess(None) => + logger.warn("Document not found") + None + case ElasticFailure(error) => + logger.error(s"Error: ${error.message}") + None +} +``` + +--- + +### 2. Use Typed Retrieval When Possible + +```scala +case class Product(id: String, name: String, price: Double) +implicit val formats: Formats = DefaultFormats + +// ❌ BAD: Manual JSON parsing +val result = client.get("product-1", "products") +val product = result match { + case ElasticSuccess(Some(json)) => + val parsed = parse(json) + Product( + (parsed \ "id").extract[String], + (parsed \ "name").extract[String], + (parsed \ "price").extract[Double] + ) + case _ => null +} + +// ✅ GOOD: Automatic deserialization +val result = client.getAs[Product]("product-1", Some("products")) +val product = result match { + case ElasticSuccess(Some(p)) => Some(p) + case _ => None +} +``` + +--- + +### 3. Use Async for Multiple Retrievals + +```scala +implicit val ec: ExecutionContext = ExecutionContext.global + +// ❌ BAD: Sequential synchronous calls +val product1 = client.getAs[Product]("product-1", Some("products")) +val product2 = client.getAs[Product]("product-2", Some("products")) +val product3 = client.getAs[Product]("product-3", Some("products")) +// Total time: T1 + T2 + T3 + +// ✅ GOOD: Parallel async calls +val futures = Seq("product-1", "product-2", "product-3").map { id => + client.getAsyncAs[Product](id, Some("products")) +} +Future.sequence(futures) +// Total time: max(T1, T2, T3) +``` + +--- + +### 4. Validate Index Names + +```scala +// ✅ GOOD: Index validation is automatic +val result = client.get("doc-1", "INVALID_INDEX!") +result match { + case ElasticFailure(error) => + // Error is caught with statusCode 400 + println(s"Validation error: ${error.message}") + case _ => () +} +``` + +--- + +### 5. Use exists() for Existence Checks + +```scala +// ❌ BAD: Using get() just to check existence +val exists = client.get("doc-1", "products") match { + case ElasticSuccess(Some(_)) => true + case _ => false +} + +// ✅ GOOD: Use dedicated exists() method +val exists = client.exists("doc-1", "products") match { + case ElasticSuccess(value) => value + case ElasticFailure(_) => false +} +``` + +--- + +### 6. Handle Deserialization Errors + +```scala +case class Product(id: String, name: String, price: Double) +implicit val formats: Formats = DefaultFormats + +// ✅ GOOD: Handle deserialization errors +val result = client.getAs[Product]("product-1", Some("products")) +result match { + case ElasticSuccess(Some(product)) => + println(s"Product: ${product.name}") + + case ElasticSuccess(None) => + println("Product not found") + + case ElasticFailure(error) => + // Could be network error OR deserialization error + error.cause match { + case Some(ex: org.json4s.MappingException) => + println(s"Deserialization error: ${ex.getMessage}") + case _ => + println(s"Error: ${error.message}") + } +} +``` + +--- + +### 7. Use Custom Formats for Complex Types + +```scala +import org.json4s.ext.JavaTimeSerializers +import java.time.LocalDateTime + +case class Order( + id: String, + total: Double, + createdAt: LocalDateTime +) + +// ✅ GOOD: Include necessary serializers +implicit val formats: Formats = DefaultFormats ++ JavaTimeSerializers.all + +val result = client.getAs[Order]("order-1", Some("orders")) +``` + +--- + +### 8. Log Operations for Debugging + +```scala +// ✅ GOOD: Logging is built-in +val result = client.get("product-1", "products") +// Automatically logs: +// DEBUG: Getting document with id 'product-1' from index 'products' +// INFO: ✅ Successfully retrieved document with id 'product-1' from index 'products' +// or +// ERROR: ❌ Failed to retrieve document with id 'product-1' from index 'products': +``` + +--- + +### 9. Use Index Inference Wisely + +```scala +case class Product(id: String, name: String) +implicit val formats: Formats = DefaultFormats + +// ✅ GOOD: Use inference for simple cases +val result1 = client.getAs[Product]("product-1") +// Uses "product" as index name + +// ✅ GOOD: Be explicit for complex cases +val result2 = client.getAs[Product]( + "product-1", + index = Some("products-v2") // Explicit index +) +``` + +--- + +### 10. Implement Retry Logic for Transient Errors + +```scala +// ✅ GOOD: Retry on server errors +def getWithRetry[T <: AnyRef]( + id: String, + index: String, + maxRetries: Int = 3 +)(implicit + m: Manifest[T], + formats: Formats +): ElasticResult[Option[T]] = { + + @tailrec + def retry(attemptsLeft: Int): ElasticResult[Option[T]] = { + client.getAs[T](id, Some(index)) match { + case success @ ElasticSuccess(_) => + success + + case failure @ ElasticFailure(error) => + error.statusCode match { + case Some(500 | 503) if attemptsLeft > 0 => + logger.warn(s"Retrying... ($attemptsLeft attempts left)") + Thread.sleep(1000) + retry(attemptsLeft - 1) + + case _ => + failure + } + } + } + + retry(maxRetries) +} +``` + +--- + +### 11. Use Pattern Matching for Clean Code + +```scala +// ✅ GOOD: Clean pattern matching +def processProduct(id: String): Unit = { + client.getAs[Product](id, Some("products")) match { + case ElasticSuccess(Some(product)) => + println(s"Processing: ${product.name}") + // Process product + + case ElasticSuccess(None) => + println(s"Product $id not found") + + case ElasticFailure(error) => + error.statusCode match { + case Some(404) => println("Not found") + case Some(500) => println("Server error") + case _ => println(s"Error: ${error.message}") + } + } +} +``` + +--- + +### 12. Test Edge Cases + +```scala +// ✅ GOOD: Test various scenarios +class GetApiEdgeCasesSpec extends AnyFlatSpec with Matchers { + + "GetApi" should "handle empty document ID" in { + val result = client.get("", "products") + result shouldBe a[ElasticFailure] + } + + it should "handle very long document ID" in { + val longId = "a" * 1000 + val result = client.get(longId, "products") + // Should handle gracefully + } + + it should "handle special characters in ID" in { + val specialId = "product-123!@#$%" + val result = client.get(specialId, "products") + // Should handle gracefully + } + + it should "handle non-existent index" in { + val result = client.get("doc-1", "non-existent-index") + result match { + case ElasticFailure(error) => + error.statusCode should (be(Some(404)) or be(Some(400))) + case _ => () + } + } +} +``` + +--- + +## Common Patterns + +### Pattern 1: Get or Create Default + +```scala +def getOrDefault[T <: AnyRef]( + id: String, + index: String, + default: => T +)(implicit + m: Manifest[T], + formats: Formats +): T = { + client.getAs[T](id, Some(index)) match { + case ElasticSuccess(Some(entity)) => entity + case _ => default + } +} + +// Usage +val product = getOrDefault[Product]( + "product-123", + "products", + Product("product-123", "Default Product", 0.0) +) +``` + +--- + +### Pattern 2: Get with Transformation + +```scala +def getAndTransform[T <: AnyRef, U]( + id: String, + index: String, + transform: T => U +)(implicit + m: Manifest[T], + formats: Formats +): ElasticResult[Option[U]] = { + client.getAs[T](id, Some(index)).map { + case Some(entity) => Some(transform(entity)) + case None => None + } +} + +// Usage +val productSummary = getAndTransform[Product, String]( + "product-123", + "products", + p => s"${p.name}: $${p.price}" +) +``` + +--- + +### Pattern 3: Get with Validation + +```scala +def getValidated[T <: AnyRef]( + id: String, + index: String, + validate: T => Boolean +)(implicit + m: Manifest[T], + formats: Formats +): ElasticResult[Option[T]] = { + client.getAs[T](id, Some(index)).flatMap { + case Some(entity) if validate(entity) => + ElasticSuccess(Some(entity)) + + case Some(_) => + ElasticFailure(ElasticError( + message = s"Validation failed for document $id", + index = Some(index) + )) + + case None => + ElasticSuccess(None) + } +} + +// Usage +val validProduct = getValidated[Product]( + "product-123", + "products", + p => p.price > 0 && p.name.nonEmpty +) +``` + +--- + +### Pattern 4: Repository Pattern + +```scala +class ProductRepository(client: ElasticClient) { + private val indexName = "products" + + implicit val formats: Formats = DefaultFormats + + def findById(id: String): Option[Product] = { + client.getAs[Product](id, Some(indexName)) match { + case ElasticSuccess(result) => result + case ElasticFailure(error) => + logger.error(s"Failed to find product $id: ${error.message}") + None + } + } + + def exists(id: String): Boolean = { + client.exists(id, indexName) match { + case ElasticSuccess(result) => result + case ElasticFailure(_) => false + } + } + + def findByIdAsync(id: String) + (implicit ec: ExecutionContext): Future[Option[Product]] = { + client.getAsyncAs[Product](id, Some(indexName)).map { + case ElasticSuccess(result) => result + case ElasticFailure(error) => + logger.error(s"Failed to find product $id: ${error.message}") + None + } + } +} + +// Usage +val repo = new ProductRepository(client) +val product = repo.findById("product-123") +``` + +--- + +### Pattern 5: Caching with TTL + +```scala +import scala.concurrent.duration._ + +case class CacheEntry[T](value: T, expiresAt: Long) + +class CachedRepository[T <: AnyRef]( + client: ElasticClient, + index: String, + ttl: Duration = 5.minutes +)(implicit m: Manifest[T], formats: Formats) { + + private val cache = TrieMap.empty[String, CacheEntry[T]] + + def get(id: String): Option[T] = { + val now = System.currentTimeMillis() + + cache.get(id) match { + case Some(entry) if entry.expiresAt > now => + // Cache hit + Some(entry.value) + + case _ => + // Cache miss or expired + client.getAs[T](id, Some(index)) match { + case ElasticSuccess(Some(entity)) => + val expiresAt = now + ttl.toMillis + cache.put(id, CacheEntry(entity, expiresAt)) + Some(entity) + + case _ => + cache.remove(id) + None + } + } + } + + def invalidate(id: String): Unit = { + cache.remove(id) + } +} + +// Usage +val repo = new CachedRepository[Product](client, "products", 10.minutes) +val product = repo.get("product-123") +``` + +--- + +## Summary + +The **Get API** provides: + +✅ **Simple document retrieval** by ID +✅ **Type-safe deserialization** with automatic conversion +✅ **Existence checking** without full retrieval +✅ **Async operations** for better performance +✅ **Comprehensive error handling** with detailed errors +✅ **Automatic validation** of index names +✅ **Index name inference** from entity types + +**Method Selection Guide:** + +| Use Case | Recommended Method | +|--------------------------|--------------------------------------| +| Check if document exists | `exists()` | +| Get raw JSON | `get()` | +| Get typed entity | `getAs[T]()` | +| Get multiple documents | `getAsync()` + `Future.sequence()` | +| Get with custom index | `getAs[T](id, Some("custom-index"))` | +| Get with type inference | `getAs[T](id)` | + +**Best Practices:** + +1. ✅ Always handle all result cases (Success/None/Failure) +2. ✅ Use typed retrieval (`getAs`) when possible +3. ✅ Use async methods for multiple retrievals +4. ✅ Use `exists()` for existence checks only +5. ✅ Handle deserialization errors explicitly +6. ✅ Use custom formats for complex types +7. ✅ Implement retry logic for transient errors +8. ✅ Use repository pattern for cleaner code +9. ✅ Test edge cases (empty IDs, invalid indexes) +10. ✅ Add caching for frequently accessed documents + +**Error Handling:** + +- **400**: Invalid index name or parameters +- **404**: Document or index not found +- **500**: Server error (consider retry) +- **503**: Service unavailable (consider retry) + +--- + +[Back to index](README.md) | [Next: Search Documents](search.md) \ No newline at end of file diff --git a/documentation/client/index.md b/documentation/client/index.md new file mode 100644 index 00000000..64f588f6 --- /dev/null +++ b/documentation/client/index.md @@ -0,0 +1,849 @@ +[Back to index](README.md) + +# INDEX API + +## Overview + +The **IndexApi** trait provides functionality to index documents into Elasticsearch, supporting both synchronous and asynchronous operations with automatic serialization. + +**Features:** +- Synchronous and asynchronous indexing +- Automatic JSON serialization from Scala objects +- Type-safe indexing with implicit serialization +- Automatic index refresh after indexing +- Comprehensive validation and error handling +- Support for custom index names and document IDs + +**Dependencies:** +- Requires `RefreshApi` for automatic refresh after indexing +- Requires `SerializationApi` for JSON serialization + +--- + +## Public Methods + +### indexAs + +Indexes a Scala object into Elasticsearch with automatic JSON serialization. + +**Signature:** + +```scala +def indexAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String] = None, + maybeType: Option[String] = None +)(implicit u: ClassTag[U], formats: Formats): ElasticResult[Boolean] +``` + +**Parameters:** +- `entity` - The Scala object to index +- `id` - The document ID +- `index` - Optional index name (defaults to entity type name in lowercase) +- `maybeType` - Optional type name (defaults to entity class name in lowercase) +- `u` - Implicit ClassTag for type information +- `formats` - Implicit JSON serialization formats + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if indexed successfully +- `ElasticFailure` with error details + +**Behavior:** +- Automatically serializes entity to JSON +- Defaults index name to entity class name if not provided +- Automatically refreshes index after successful indexing + +**Examples:** + +```scala +// Domain models +case class Product(name: String, price: Double, category: String) +case class User(username: String, email: String, age: Int) + +implicit val formats: Formats = DefaultFormats + +// Basic indexing with auto-generated index name +val product = Product("Laptop", 999.99, "Electronics") +client.indexAs(product, id = "prod-001") match { + case ElasticSuccess(true) => println("Product indexed") + case ElasticFailure(e) => println(s"Error: ${e.message}") +} +// Indexes to "product" index (class name lowercased) + +// Explicit index name +val user = User("john_doe", "john@example.com", 30) +client.indexAs( + entity = user, + id = "user-123", + index = Some("users-v2") +) +// Indexes to "users-v2" index + +// Custom type name +client.indexAs( + entity = product, + id = "prod-002", + index = Some("catalog"), + maybeType = Some("electronics") +) + +// Index multiple documents +val products = List( + Product("Phone", 699.99, "Electronics"), + Product("Tablet", 499.99, "Electronics"), + Product("Headphones", 199.99, "Audio") +) + +products.zipWithIndex.foreach { case (product, idx) => + client.indexAs(product, id = s"prod-${idx + 1}", index = Some("products")) +} + +// With custom serialization formats +import org.json4s.ext.JavaTimeSerializers + +case class Event(name: String, timestamp: java.time.Instant) + +implicit val customFormats: Formats = DefaultFormats ++ JavaTimeSerializers.all + +val event = Event("UserLogin", java.time.Instant.now()) +client.indexAs(event, id = "evt-001", index = Some("events")) + +// Error handling with pattern matching +client.indexAs(product, id = "prod-001") match { + case ElasticSuccess(true) => + println("✅ Document indexed and searchable") + case ElasticSuccess(false) => + println("⚠️ Document not indexed") + case ElasticFailure(error) => + println(s"❌ Indexing failed: ${error.message}") + error.cause.foreach(ex => println(s"Cause: ${ex.getMessage}")) +} + +// Monadic composition +def indexWithValidation[T <: AnyRef]( + entity: T, + id: String +)(implicit ct: ClassTag[T], formats: Formats): ElasticResult[Boolean] = { + for { + validated <- validateEntity(entity) + indexed <- client.indexAs(validated, id) + } yield indexed +} +``` + +--- + +### index + +Indexes a document into Elasticsearch using a raw JSON string. + +**Signature:** + +```scala +def index(index: String, id: String, source: String): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - The index name +- `id` - The document ID +- `source` - The document as a JSON string + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if indexed successfully +- `ElasticFailure` with error details (400 for validation errors) + +**Validation:** +- Index name format validation + +**Behavior:** +- Automatically refreshes index after successful indexing +- Creates or updates document (upsert behavior) + +**Examples:** + +```scala +// Basic indexing with JSON string +val json = """ +{ + "name": "Laptop", + "price": 999.99, + "category": "Electronics" +} +""" +client.index("products", "prod-001", json) + +// Compact JSON +val compactJson = """{"title":"Elasticsearch Guide","author":"John Doe"}""" +client.index("books", "book-001", compactJson) + +// Dynamic JSON generation +def createUserJson(username: String, email: String): String = { + s""" + { + "username": "$username", + "email": "$email", + "created_at": "${java.time.Instant.now()}" + } + """ +} + +client.index("users", "user-123", createUserJson("john_doe", "john@example.com")) + +// Index with JSON library +import org.json4s.jackson.JsonMethods._ + +val data = Map( + "name" -> "Product Name", + "price" -> 99.99, + "tags" -> List("electronics", "sale") +) +val jsonString = compact(render(decompose(data))) +client.index("products", "prod-002", jsonString) + +// Batch indexing with JSON +val documents = List( + ("doc-1", """{"field": "value1"}"""), + ("doc-2", """{"field": "value2"}"""), + ("doc-3", """{"field": "value3"}""") +) + +documents.foreach { case (id, json) => + client.index("my-index", id, json) +} + +// Update existing document +val updatedJson = """ +{ + "name": "Updated Product", + "price": 899.99, + "category": "Electronics", + "updated_at": "2024-01-15T10:30:00Z" +} +""" +client.index("products", "prod-001", updatedJson) // Overwrites existing + +// Index with validation +def safeIndex(index: String, id: String, json: String): ElasticResult[Boolean] = { + // Validate JSON before indexing + ElasticResult.attempt(parse(json)).flatMap { _ => + client.index(index, id, json) + } +} + +// Error handling +client.index("products", "prod-001", json) match { + case ElasticSuccess(true) => + println("✅ Document indexed and refreshed") + case ElasticFailure(error) if error.statusCode.contains(400) => + println(s"❌ Validation error: ${error.message}") + case ElasticFailure(error) => + println(s"❌ Indexing error: ${error.message}") +} +``` + +--- + +### indexAsyncAs + +Asynchronously indexes a Scala object with automatic JSON serialization. + +**Signature:** + +```scala +def indexAsyncAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String] = None, + maybeType: Option[String] = None +)(implicit + u: ClassTag[U], + ec: ExecutionContext, + formats: Formats +): Future[ElasticResult[Boolean]] +``` + +**Parameters:** +- `entity` - The Scala object to index +- `id` - The document ID +- `index` - Optional index name +- `maybeType` - Optional type name +- `u` - Implicit ClassTag +- `ec` - Implicit ExecutionContext for async execution +- `formats` - Implicit JSON serialization formats + +**Returns:** +- `Future[ElasticResult[Boolean]]` that completes when indexing finishes + +**Examples:** + +```scala +import scala.concurrent.ExecutionContext.Implicits.global +import scala.concurrent.Future + +case class Product(name: String, price: Double) +implicit val formats: Formats = DefaultFormats + +// Basic async indexing +val product = Product("Laptop", 999.99) +val futureResult: Future[ElasticResult[Boolean]] = + client.indexAsyncAs(product, id = "prod-001") + +futureResult.onComplete { + case Success(ElasticSuccess(true)) => + println("✅ Product indexed asynchronously") + case Success(ElasticFailure(error)) => + println(s"❌ Indexing failed: ${error.message}") + case Failure(exception) => + println(s"❌ Future failed: ${exception.getMessage}") +} + +// Await result (for testing) +import scala.concurrent.Await +import scala.concurrent.duration._ + +val result = Await.result( + client.indexAsyncAs(product, id = "prod-001"), + 5.seconds +) + +// Batch async indexing +val products = List( + Product("Phone", 699.99), + Product("Tablet", 499.99), + Product("Laptop", 999.99) +) + +val futures: List[Future[ElasticResult[Boolean]]] = + products.zipWithIndex.map { case (product, idx) => + client.indexAsyncAs(product, id = s"prod-${idx + 1}") + } + +Future.sequence(futures).onComplete { + case Success(results) => + val successful = results.count { + case ElasticSuccess(true) => true + case _ => false + } + println(s"✅ Indexed $successful/${results.length} documents") + case Failure(exception) => + println(s"❌ Batch indexing failed: ${exception.getMessage}") +} + +// Parallel indexing with rate limiting +def indexWithRateLimit[T <: AnyRef]( + entities: List[(T, String)], + maxConcurrent: Int = 10 +)(implicit ct: ClassTag[T], ec: ExecutionContext, formats: Formats): Future[List[ElasticResult[Boolean]]] = { + + entities.grouped(maxConcurrent).foldLeft(Future.successful(List.empty[ElasticResult[Boolean]])) { + case (accFuture, batch) => + accFuture.flatMap { acc => + val batchFutures = batch.map { case (entity, id) => + client.indexAsyncAs(entity, id) + } + Future.sequence(batchFutures).map(acc ++ _) + } + } +} + +// Non-blocking pipeline +def processAndIndex(data: List[RawData]): Future[Int] = { + val processedFuture = Future { + data.map(transform) + } + + processedFuture.flatMap { processed => + val indexFutures = processed.map { entity => + client.indexAsyncAs(entity, id = entity.id) + } + + Future.sequence(indexFutures).map { results => + results.count { + case ElasticSuccess(true) => true + case _ => false + } + } + } +} + +// Error recovery +def indexWithRetry[T <: AnyRef]( + entity: T, + id: String, + maxRetries: Int = 3 +)(implicit ct: ClassTag[T], ec: ExecutionContext, formats: Formats): Future[ElasticResult[Boolean]] = { + + def attempt(remaining: Int): Future[ElasticResult[Boolean]] = { + client.indexAsyncAs(entity, id).flatMap { + case success @ ElasticSuccess(true) => Future.successful(success) + case failure if remaining > 0 => + Thread.sleep(1000) // Backoff + attempt(remaining - 1) + case failure => Future.successful(failure) + } + } + + attempt(maxRetries) +} +``` + +--- + +### indexAsync + +Asynchronously indexes a document using a raw JSON string. + +**Signature:** + +```scala +def indexAsync( + index: String, + id: String, + source: String +)(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] +``` + +**Parameters:** +- `index` - The index name +- `id` - The document ID +- `source` - The document as a JSON string +- `ec` - Implicit ExecutionContext + +**Returns:** +- `Future[ElasticResult[Boolean]]` that completes when indexing finishes + +**Validation:** +- Index name format validation (performed synchronously before async execution) + +**Examples:** + +```scala +import scala.concurrent.ExecutionContext.Implicits.global + +// Basic async indexing +val json = """{"name": "Product", "price": 99.99}""" +val future = client.indexAsync("products", "prod-001", json) + +future.onComplete { + case Success(ElasticSuccess(true)) => + println("✅ Document indexed") + case Success(ElasticFailure(error)) => + println(s"❌ Error: ${error.message}") + case Failure(ex) => + println(s"❌ Future failed: ${ex.getMessage}") +} + +// Batch async indexing +val documents = List( + ("doc-1", """{"field": "value1"}"""), + ("doc-2", """{"field": "value2"}"""), + ("doc-3", """{"field": "value3"}""") +) + +val futures = documents.map { case (id, json) => + client.indexAsync("my-index", id, json) +} + +Future.sequence(futures).map { results => + val successCount = results.count { + case ElasticSuccess(true) => true + case _ => false + } + println(s"Indexed $successCount/${results.length} documents") +} + +// Streaming indexing +import akka.stream.scaladsl._ + +def streamIndex( + index: String, + source: Source[(String, String), _] +): Future[Int] = { + source + .mapAsync(parallelism = 10) { case (id, json) => + client.indexAsync(index, id, json) + } + .runFold(0) { (count, result) => + result match { + case ElasticSuccess(true) => count + 1 + case _ => count + } + } +} + +// Dynamic content generation +def generateAndIndex( + index: String, + ids: List[String] +): Future[List[ElasticResult[Boolean]]] = { + val futures = ids.map { id => + val json = generateDynamicContent(id) + client.indexAsync(index, id, json) + } + Future.sequence(futures) +} + +// Chained async operations +def fetchAndIndex( + externalId: String +): Future[ElasticResult[Boolean]] = { + for { + data <- fetchFromExternalAPI(externalId) + json = convertToJson(data) + result <- client.indexAsync("my-index", externalId, json) + } yield result +} + +// Error handling with recovery +client.indexAsync("products", "prod-001", json) + .recover { + case ex: Exception => + ElasticFailure(ElasticError( + message = s"Indexing failed: ${ex.getMessage}", + cause = Some(ex) + )) + } + .foreach { + case ElasticSuccess(true) => println("Success") + case ElasticFailure(e) => println(s"Failed: ${e.message}") + } +``` + +--- + +## Implementation Requirements + +### executeIndex + +```scala +private[client] def executeIndex( + index: String, + id: String, + source: String +): ElasticResult[Boolean] +``` + +**Implementation Example:** + +```scala +private[client] def executeIndex( + index: String, + id: String, + source: String +): ElasticResult[Boolean] = { + executeRestAction[IndexResponse, Boolean]( + operation = "index", + index = Some(index) + )( + action = { + val request = new IndexRequest(index) + .id(id) + .source(source, XContentType.JSON) + client.index(request, RequestOptions.DEFAULT) + } + )( + transformer = resp => { + resp.getResult == DocWriteResponse.Result.CREATED || + resp.getResult == DocWriteResponse.Result.UPDATED + } + ) +} +``` + +--- + +### executeIndexAsync + +```scala +private[client] def executeIndexAsync( + index: String, + id: String, + source: String +)(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] +``` + +**Implementation Example:** + +```scala +private[client] def executeIndexAsync( + index: String, + id: String, + source: String +)(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] = { + val promise = Promise[ElasticResult[Boolean]]() + + val request = new IndexRequest(index) + .id(id) + .source(source, XContentType.JSON) + + client.indexAsync( + request, + RequestOptions.DEFAULT, + new ActionListener[IndexResponse] { + override def onResponse(response: IndexResponse): Unit = { + val success = response.getResult == DocWriteResponse.Result.CREATED || + response.getResult == DocWriteResponse.Result.UPDATED + promise.success(ElasticSuccess(success)) + } + + override def onFailure(e: Exception): Unit = { + promise.success(ElasticFailure(ElasticError( + message = s"Async indexing failed: ${e.getMessage}", + operation = Some("indexAsync"), + index = Some(index), + cause = Some(e) + ))) + } + } + ) + + promise.future +} +``` + +--- + +## Common Patterns + +### Repository Pattern + +```scala +trait Repository[T <: AnyRef] { + def save(entity: T, id: String)(implicit + ct: ClassTag[T], + formats: Formats, + client: ElasticClient + ): ElasticResult[Boolean] = { + client.indexAs(entity, id) + } + + def saveAsync(entity: T, id: String)(implicit + ct: ClassTag[T], + formats: Formats, + ec: ExecutionContext, + client: ElasticClient + ): Future[ElasticResult[Boolean]] = { + client.indexAsyncAs(entity, id) + } +} + +case class Product(name: String, price: Double) + +object ProductRepository extends Repository[Product] { + implicit val formats: Formats = DefaultFormats + + def saveProduct(product: Product, id: String)(implicit + client: ElasticClient + ): ElasticResult[Boolean] = { + save(product, id) + } +} +``` + +### Bulk Indexing with Individual Operations + +```scala +def indexAllIndividually[T <: AnyRef]( + entities: List[(T, String)], + indexName: String +)(implicit ct: ClassTag[T], formats: Formats): ElasticResult[List[Boolean]] = { + ElasticResult.sequence( + entities.map { case (entity, id) => + client.indexAs(entity, id, index = Some(indexName)) + } + ) +} +``` + +### Upsert Pattern + +```scala +def upsertDocument( + index: String, + id: String, + document: Map[String, Any] +): ElasticResult[Boolean] = { + val json = compact(render(decompose(document))) + client.index(index, id, json) +} +``` + +### Versioned Documents + +```scala +case class VersionedDocument[T]( + data: T, + version: Int, + updatedAt: java.time.Instant +) + +def indexVersioned[T <: AnyRef]( + entity: T, + id: String, + version: Int +)(implicit ct: ClassTag[T], formats: Formats): ElasticResult[Boolean] = { + val versioned = VersionedDocument(entity, version, java.time.Instant.now()) + client.indexAs(versioned, id) +} +``` + +--- + +## Performance Optimization + +### Disable Refresh for Bulk Operations + +```scala +def bulkIndexOptimized[T <: AnyRef]( + entities: List[(T, String)], + indexName: String +)(implicit ct: ClassTag[T], formats: Formats): ElasticResult[Unit] = { + for { + // Disable refresh + _ <- client.toggleRefresh(indexName, enable = false) + + // Index all documents + _ <- entities.foldLeft(ElasticResult.success(())) { case (acc, (entity, id)) => + acc.flatMap(_ => client.indexAs(entity, id, Some(indexName)).map(_ => ())) + } + + // Re-enable refresh + _ <- client.toggleRefresh(indexName, enable = true) + + // Manual refresh + _ <- client.refresh(indexName) + } yield () +} +``` + +### Parallel Async Indexing + +```scala +def parallelIndex[T <: AnyRef]( + entities: List[(T, String)], + parallelism: Int = 10 +)(implicit + ct: ClassTag[T], + ec: ExecutionContext, + formats: Formats +): Future[List[ElasticResult[Boolean]]] = { + + entities + .grouped(parallelism) + .foldLeft(Future.successful(List.empty[ElasticResult[Boolean]])) { + case (accFuture, batch) => + accFuture.flatMap { acc => + val batchFutures = batch.map { case (entity, id) => + client.indexAsyncAs(entity, id) + } + Future.sequence(batchFutures).map(acc ++ _) + } + } +} +``` + +--- + +## Error Handling + +**Invalid Index Name:** + +```scala +client.index("INVALID INDEX", "doc-1", json) match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.message.contains("Invalid index")) +} +``` + +**Serialization Failure:** + +```scala +case class InvalidEntity(data: java.io.InputStream) // Not serializable + +client.indexAs(InvalidEntity(null), "doc-1") match { + case ElasticFailure(error) => + println(s"Serialization failed: ${error.message}") +} +``` + +**Async Failure Handling:** + +```scala +client.indexAsync("products", "prod-001", invalidJson) + .recover { + case ex: Exception => + ElasticFailure(ElasticError( + message = s"Async operation failed: ${ex.getMessage}", + cause = Some(ex) + )) + } +``` + +--- + +## Best Practices + +**1. Use Type-Safe indexAs for Domain Objects** + +```scala +// ✅ Good - type-safe +case class User(name: String, email: String) +client.indexAs(user, "user-123") + +// ❌ Avoid - error-prone +val json = s"""{"name":"${user.name}","email":"${user.email}"}""" +client.index("user", "user-123", json) +``` + +**2. Handle Refresh Appropriately** + +```scala +// For single documents - automatic refresh is fine +client.indexAs(product, "prod-001") + +// For bulk operations - disable refresh +for { + _ <- client.toggleRefresh("products", enable = false) + _ <- indexMany(products) + _ <- client.toggleRefresh("products", enable = true) + _ <- client.refresh("products") +} yield () +``` + +**3. Use Async for High-Throughput Scenarios** + +```scala +// ✅ Good - non-blocking +val futures = documents.map { case (id, doc) => + client.indexAsyncAs(doc, id) +} +Future.sequence(futures) + +// ❌ Avoid - blocks thread pool +documents.foreach { case (id, doc) => + Await.result(client.indexAsyncAs(doc, id), 10.seconds) +} +``` + +**4. Implement Proper Error Handling** + +```scala +def safeIndex[T <: AnyRef](entity: T, id: String)(implicit + ct: ClassTag[T], + formats: Formats +): ElasticResult[Boolean] = { + client.indexAs(entity, id) match { + case success @ ElasticSuccess(true) => success + case failure @ ElasticFailure(error) => + logger.error(s"Failed to index $id: ${error.message}") + // Implement retry logic or fallback + failure + } +} +``` + +--- + +[Back to index](README.md) | [Next: Update Documents](update.md) \ No newline at end of file diff --git a/documentation/client/indices.md b/documentation/client/indices.md new file mode 100644 index 00000000..cd472f81 --- /dev/null +++ b/documentation/client/indices.md @@ -0,0 +1,839 @@ +[Back to index](README.md) + +# INDICES API + +## Overview + +The **IndicesApi** trait provides comprehensive index management functionality for Elasticsearch, including creation, deletion, lifecycle operations (open/close), reindexing, and existence checks. + +**Features:** +- Robust error handling with `ElasticResult` +- Detailed logging for debugging +- Parameter validation (index names, JSON settings) +- Automatic refresh after reindexing +- Pre-configured default settings with n-gram analysis + +**Dependencies:** +- Extends `ElasticClientHelpers` for validation and logging +- Requires `RefreshApi` for post-reindex refresh operations + +--- + +## Configuration + +### defaultSettings + +Pre-configured index settings with n-gram tokenizer and analyzer for partial matching capabilities. + +**Configuration Details:** + +```scala +val defaultSettings: String = """ +{ + "index": { + "max_ngram_diff": "20", + "mapping": { + "total_fields": { + "limit": "2000" + } + }, + "analysis": { + "analyzer": { + "ngram_analyzer": { + "tokenizer": "ngram_tokenizer", + "filter": ["lowercase", "asciifolding"] + }, + "search_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["lowercase", "asciifolding"] + } + }, + "tokenizer": { + "ngram_tokenizer": { + "type": "ngram", + "min_gram": 1, + "max_gram": 20, + "token_chars": ["letter", "digit"] + } + } + } + } +} +""" +``` + +**Features:** +- **N-gram tokenizer:** Supports partial matching (1-20 characters) +- **ASCII folding:** Normalizes accented characters +- **Field limit:** Allows up to 2000 fields per index +- **Case insensitive:** Lowercase filter applied + +--- + +## Public Methods + +### createIndex + +Creates a new index with specified settings. + +**Signature:** + +```scala +def createIndex( + index: String, + settings: String = defaultSettings +): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - Name of the index to create +- `settings` - JSON settings for the index (defaults to `defaultSettings`) + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if created, `false` otherwise +- `ElasticFailure` with error details (400 for validation, other codes from ES) + +**Validation:** +- Index name format validation +- JSON settings syntax validation + +**Examples:** + +```scala +// Create with default settings +client.createIndex("products") match { + case ElasticSuccess(true) => println("Index created") + case ElasticSuccess(false) => println("Index already exists") + case ElasticFailure(e) => println(s"Error: ${e.message}") +} + +// Create with custom settings +val customSettings = """ +{ + "index": { + "number_of_shards": 3, + "number_of_replicas": 2, + "refresh_interval": "30s" + } +} +""" +client.createIndex("high-volume-index", customSettings) + +// Create with mappings +val settingsWithMappings = """ +{ + "settings": { + "number_of_shards": 1 + }, + "mappings": { + "properties": { + "title": { "type": "text" }, + "price": { "type": "double" }, + "created_at": { "type": "date" } + } + } +} +""" +client.createIndex("catalog", settingsWithMappings) + +// Monadic creation with error handling +for { + created <- client.createIndex("users") + _ <- if (created) ElasticResult.success(()) + else ElasticResult.failure("Index not created") + indexed <- client.index("users", userData) +} yield indexed +``` + +--- + +### deleteIndex + +Deletes an existing index. + +**Signature:** + +```scala +def deleteIndex(index: String): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - Name of the index to delete + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if deleted, `false` otherwise +- `ElasticFailure` with error details + +**Examples:** + +```scala +// Simple deletion +client.deleteIndex("old-index") + +// Safe deletion with existence check +for { + exists <- client.indexExists("temp-index") + deleted <- if (exists) client.deleteIndex("temp-index") + else ElasticResult.success(false) +} yield deleted + +// Cleanup multiple indices +val oldIndices = List("logs-2023-01", "logs-2023-02", "logs-2023-03") +oldIndices.foreach { index => + client.deleteIndex(index) match { + case ElasticSuccess(_) => println(s"Deleted $index") + case ElasticFailure(e) => println(s"Failed to delete $index: ${e.message}") + } +} +``` + +⚠️ **Warning:** Deletion is irreversible. All data in the index will be permanently lost. + +--- + +### closeIndex + +Closes an index, blocking read/write operations while preserving data on disk. + +**Signature:** + +```scala +def closeIndex(index: String): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - Name of the index to close + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if closed, `false` otherwise +- `ElasticFailure` with error details + +**Use Cases:** +- Reduce memory/CPU usage for inactive indices +- Perform maintenance operations +- Prepare for backup or snapshot + +**Examples:** + +```scala +// Close inactive index +client.closeIndex("archive-2023") + +// Close multiple seasonal indices +val winterIndices = List("sales-dec", "sales-jan", "sales-feb") +winterIndices.foreach(client.closeIndex) + +// Close and verify +for { + closed <- client.closeIndex("old-data") + exists <- client.indexExists("old-data") +} yield (closed, exists) // (true, true) - closed but still exists +``` + +--- + +### openIndex + +Opens a previously closed index, making it available for read/write operations. + +**Signature:** + +```scala +def openIndex(index: String): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - Name of the index to open + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if opened, `false` otherwise +- `ElasticFailure` with error details + +**Examples:** + +```scala +// Reactivate closed index +client.openIndex("archive-2023") + +// Open and search +for { + opened <- client.openIndex("historical-data") + results <- client.search("historical-data", searchQuery) +} yield results + +// Conditional opening +def ensureIndexOpen(index: String): ElasticResult[Boolean] = { + client.indexExists(index).flatMap { + case true => client.openIndex(index) + case false => ElasticResult.failure(s"Index $index does not exist") + } +} +``` + +--- + +### reindex + +Copies documents from a source index to a target index with optional refresh. + +**Signature:** + +```scala +def reindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean = true +): ElasticResult[(Boolean, Option[Long])] +``` + +**Parameters:** +- `sourceIndex` - Name of the source index +- `targetIndex` - Name of the target index (must already exist) +- `refresh` - Whether to refresh target index after reindexing (default: `true`) + +**Returns:** +- `ElasticSuccess[(Boolean, Option[Long])]` with success flag and document count +- `ElasticFailure` with error details + +**Validation:** +- Both indices must have valid names +- Source and target must be different +- Both indices must exist (404 if not found) + +**Examples:** + +```scala +// Basic reindex +client.reindex("products-v1", "products-v2") match { + case ElasticSuccess((true, Some(count))) => + println(s"Reindexed $count documents") + case ElasticSuccess((true, None)) => + println("Reindex succeeded (count unavailable)") + case ElasticSuccess((false, _)) => + println("Reindex failed") + case ElasticFailure(e) => + println(s"Error: ${e.message}") +} + +// Reindex without immediate refresh (better performance) +client.reindex("logs-old", "logs-new", refresh = false) + +// Complete migration workflow +def migrateIndex(oldIndex: String, newIndex: String): ElasticResult[Unit] = { + for { + // Create new index with updated settings + _ <- client.createIndex(newIndex, improvedSettings) + + // Copy all documents + (success, count) <- client.reindex(oldIndex, newIndex) + + // Verify count matches + _ <- if (success) ElasticResult.success(()) + else ElasticResult.failure("Reindex failed") + + // Delete old index + _ <- client.deleteIndex(oldIndex) + } yield () +} + +// Reindex with error recovery +client.reindex("source", "target") match { + case ElasticSuccess((true, Some(count))) => + println(s"✅ Successfully reindexed $count documents") + case ElasticSuccess((true, None)) => + println("⚠️ Reindex succeeded but document count unavailable") + case ElasticSuccess((false, _)) => + println("❌ Reindex operation failed") + // Attempt cleanup + client.deleteIndex("target") + case ElasticFailure(error) if error.statusCode.contains(404) => + println(s"❌ Index not found: ${error.message}") + case ElasticFailure(error) => + println(s"❌ Reindex error: ${error.message}") +} + +// Batch reindexing with progress tracking +val migrations = List( + ("users-v1", "users-v2"), + ("orders-v1", "orders-v2"), + ("products-v1", "products-v2") +) + +migrations.foreach { case (source, target) => + client.reindex(source, target) match { + case ElasticSuccess((true, Some(count))) => + println(s"✅ $source -> $target: $count docs") + case ElasticFailure(e) => + println(s"❌ $source -> $target: ${e.message}") + } +} +``` + +**Notes:** +- Target index must be created before reindexing +- Reindexing does not copy index settings or mappings +- For large indices, consider using `refresh = false` and manually refresh later +- The operation is synchronous and may take time for large datasets + +--- + +### indexExists + +Checks whether an index exists in the cluster. + +**Signature:** + +```scala +def indexExists(index: String): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - Name of the index to check + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if exists, `false` otherwise +- `ElasticFailure` with error details + +**Examples:** + +```scala +// Simple existence check +client.indexExists("products") match { + case ElasticSuccess(true) => println("Index exists") + case ElasticSuccess(false) => println("Index does not exist") + case ElasticFailure(e) => println(s"Error: ${e.message}") +} + +// Conditional creation +def createIfNotExists(index: String, settings: String): ElasticResult[Boolean] = { + client.indexExists(index).flatMap { + case false => client.createIndex(index, settings) + case true => ElasticResult.success(false) // Already exists + } +} + +// Safe deletion +def deleteIfExists(index: String): ElasticResult[Boolean] = { + for { + exists <- client.indexExists(index) + deleted <- if (exists) client.deleteIndex(index) + else ElasticResult.success(false) + } yield deleted +} + +// Validate multiple indices +val requiredIndices = List("users", "products", "orders") +val existenceChecks = requiredIndices.map { index => + index -> client.indexExists(index) +} + +existenceChecks.foreach { + case (index, ElasticSuccess(true)) => println(s"✅ $index exists") + case (index, ElasticSuccess(false)) => println(s"❌ $index missing") + case (index, ElasticFailure(e)) => println(s"⚠️ $index check failed: ${e.message}") +} +``` + +--- + +## Implementation Requirements + +The following methods must be implemented by each client-specific trait: + +### executeCreateIndex + +```scala +private[client] def executeCreateIndex( + index: String, + settings: String +): ElasticResult[Boolean] +``` + +**REST High Level Client (ES 6-7):** + +```scala +private[client] def executeCreateIndex( + index: String, + settings: String +): ElasticResult[Boolean] = { + executeRestAction[CreateIndexResponse, Boolean]( + operation = "createIndex", + index = Some(index) + )( + action = client.indices().create( + new CreateIndexRequest(index).source(settings, XContentType.JSON), + RequestOptions.DEFAULT + ) + )( + transformer = _.isAcknowledged + ) +} +``` + +**Java Client (ES 8-9):** + +```scala +private[client] def executeCreateIndex( + index: String, + settings: String +): ElasticResult[Boolean] = { + executeJavaAction[CreateIndexResponse, Boolean]( + operation = "createIndex", + index = Some(index) + )( + action = { + val request = CreateIndexRequest.of(b => + b.index(index).withJson(new StringReader(settings)) + ) + client.indices().create(request) + } + )( + transformer = _.acknowledged() + ) +} +``` + +--- + +### executeDeleteIndex + +```scala +private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] +``` + +**REST High Level Client (ES 6-7):** + +```scala +private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = { + executeRestAction[AcknowledgedResponse, Boolean]( + operation = "deleteIndex", + index = Some(index) + )( + action = client.indices().delete( + new DeleteIndexRequest(index), + RequestOptions.DEFAULT + ) + )( + transformer = _.isAcknowledged + ) +} +``` + +**Java Client (ES 8-9):** + +```scala +private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = { + executeJavaAction[DeleteIndexResponse, Boolean]( + operation = "deleteIndex", + index = Some(index) + )( + action = client.indices().delete( + DeleteIndexRequest.of(b => b.index(index)) + ) + )( + transformer = _.acknowledged() + ) +} +``` + +--- + +### executeCloseIndex + +```scala +private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] +``` + +**REST High Level Client (ES 6-7):** + +```scala +private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = { + executeRestAction[CloseIndexResponse, Boolean]( + operation = "closeIndex", + index = Some(index) + )( + action = client.indices().close( + new CloseIndexRequest(index), + RequestOptions.DEFAULT + ) + )( + transformer = _.isAcknowledged + ) +} +``` + +**Java Client (ES 8-9):** + +```scala +private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = { + executeJavaAction[CloseIndexResponse, Boolean]( + operation = "closeIndex", + index = Some(index) + )( + action = client.indices().close( + CloseIndexRequest.of(b => b.index(index)) + ) + )( + transformer = _.acknowledged() + ) +} +``` + +--- + +### executeOpenIndex + +```scala +private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] +``` + +**REST High Level Client (ES 6-7):** + +```scala +private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = { + executeRestAction[OpenIndexResponse, Boolean]( + operation = "openIndex", + index = Some(index) + )( + action = client.indices().open( + new OpenIndexRequest(index), + RequestOptions.DEFAULT + ) + )( + transformer = _.isAcknowledged + ) +} +``` + +**Java Client (ES 8-9):** + +```scala +private[client] def executeOpenIndex(index: String): ElasticResult[Boolean] = { + executeJavaAction[OpenIndexResponse, Boolean]( + operation = "openIndex", + index = Some(index) + )( + action = client.indices().open( + OpenIndexRequest.of(b => b.index(index)) + ) + )( + transformer = _.acknowledged() + ) +} +``` + +--- + +### executeReindex + +```scala +private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean +): ElasticResult[(Boolean, Option[Long])] +``` + +**REST High Level Client (ES 6-7):** + +```scala +private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean +): ElasticResult[(Boolean, Option[Long])] = { + executeRestAction[BulkByScrollResponse, (Boolean, Option[Long])]( + operation = "reindex", + index = Some(targetIndex) + )( + action = { + val request = new ReindexRequest() + .setSourceIndices(sourceIndex) + .setDestIndex(targetIndex) + .setRefresh(refresh) + client.reindex(request, RequestOptions.DEFAULT) + } + )( + transformer = resp => { + val success = resp.getBulkFailures.isEmpty + val count = Some(resp.getTotal) + (success, count) + } + ) +} +``` + +**Java Client (ES 8-9):** + +```scala +private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean +): ElasticResult[(Boolean, Option[Long])] = { + executeJavaAction[ReindexResponse, (Boolean, Option[Long])]( + operation = "reindex", + index = Some(targetIndex) + )( + action = { + val request = ReindexRequest.of(b => b + .source(s => s.index(sourceIndex)) + .dest(d => d.index(targetIndex)) + .refresh(refresh) + ) + client.reindex(request) + } + )( + transformer = resp => { + val success = resp.failures().isEmpty + val count = Some(resp.total()) + (success, count) + } + ) +} +``` + +--- + +### executeIndexExists + +```scala +private[client] def executeIndexExists(index: String): ElasticResult[Boolean] +``` + +**REST High Level Client (ES 6-7):** + +```scala +private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = { + executeRestAction[java.lang.Boolean, Boolean]( + operation = "indexExists", + index = Some(index) + )( + action = client.indices().exists( + new GetIndexRequest(index), + RequestOptions.DEFAULT + ) + )( + transformer = exists => exists.booleanValue() + ) +} +``` + +**Java Client (ES 8-9):** + +```scala +private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = { + executeJavaAction[BooleanResponse, Boolean]( + operation = "indexExists", + index = Some(index) + )( + action = client.indices().exists( + ExistsRequest.of(b => b.index(index)) + ) + )( + transformer = _.value() + ) +} +``` + +--- + +## Error Handling + +**Invalid Index Name (400):** + +```scala +client.createIndex("INVALID INDEX") match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.operation.contains("createIndex")) +} +``` + +**Invalid JSON Settings (400):** + +```scala +client.createIndex("test", "{ invalid json }") match { + case ElasticFailure(error) => + assert(error.message.contains("Invalid settings")) + assert(error.statusCode.contains(400)) +} +``` + +**Index Not Found (404):** + +```scala +client.reindex("missing-source", "target") match { + case ElasticFailure(error) => + assert(error.statusCode.contains(404)) + assert(error.message.contains("does not exist")) +} +``` + +**Same Source and Target:** + +```scala +client.reindex("products", "products") match { + case ElasticFailure(error) => + assert(error.message.contains("cannot be the same")) + assert(error.statusCode.contains(400)) +} +``` + +--- + +## Best Practices + +**Index Lifecycle Management:** + +```scala +// Create index with appropriate settings +val settings = if (isProduction) productionSettings else defaultSettings +client.createIndex("app-index", settings) + +// Regular maintenance +def archiveOldData(activeIndex: String, archiveIndex: String): Unit = { + for { + _ <- client.createIndex(archiveIndex) + (success, count) <- client.reindex(activeIndex, archiveIndex) + _ <- if (success) client.closeIndex(archiveIndex) + else ElasticResult.failure("Archival failed") + } yield count +} +``` + +**Safe Index Operations:** + +```scala +// Always check existence before operations +def safeCreateIndex(index: String): ElasticResult[Boolean] = { + client.indexExists(index).flatMap { + case true => + println(s"Index $index already exists") + ElasticResult.success(false) + case false => + client.createIndex(index) + } +} +``` + +**Performance Optimization:** + +```scala +// For large reindex operations, disable refresh +client.reindex("large-source", "large-target", refresh = false) + .flatMap { case (success, count) => + if (success) { + // Manual refresh after completion + Thread.sleep(5000) // Allow time for indexing + client.refresh("large-target") + } else { + ElasticResult.failure("Reindex failed") + } + } +``` + +--- + +[Back to index](README.md) | [Next: Settings Management](settings.md) \ No newline at end of file diff --git a/documentation/client/mappings.md b/documentation/client/mappings.md new file mode 100644 index 00000000..2dce9e2c --- /dev/null +++ b/documentation/client/mappings.md @@ -0,0 +1,836 @@ +[Back to index](README.md) + +# MAPPING API + +## Overview + +The **MappingApi** trait provides comprehensive mapping management functionality for Elasticsearch indices, including creation, retrieval, comparison, and safe migration with automatic rollback capabilities. + +**Features:** +- Set and retrieve index mappings +- Compare mappings to detect changes +- Automatic mapping migration with rollback on failure +- Zero-downtime mapping updates +- Comprehensive validation and error handling +- Backup and restore mechanisms + +**Dependencies:** +- Requires `SettingsApi` for settings management +- Requires `IndicesApi` for index operations +- Requires `RefreshApi` for post-migration refresh + +--- + +## Public Methods + +### setMapping + +Sets or updates the mapping for an index. + +**Signature:** + +```scala +def setMapping(index: String, mapping: String): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - The index name to set the mapping for +- `mapping` - JSON string containing the mapping definition + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if mapping set successfully +- `ElasticFailure` with error details (400 for validation errors) + +**Validation:** +- Index name format validation +- JSON syntax validation + +**Limitations:** +- In Elasticsearch, most mapping changes are additive only +- Cannot change existing field types (requires reindexing) +- Cannot delete fields (they remain in the mapping but can be ignored) + +**Examples:** + +```scala +// Basic mapping +val mapping = """ +{ + "properties": { + "title": { + "type": "text", + "analyzer": "standard" + }, + "price": { + "type": "double" + }, + "created_at": { + "type": "date" + } + } +} +""" +client.setMapping("products", mapping) + +// Add new fields to existing mapping +val additionalFields = """ +{ + "properties": { + "description": { + "type": "text" + }, + "tags": { + "type": "keyword" + } + } +} +""" +client.setMapping("products", additionalFields) + +// Complex mapping with nested objects +val complexMapping = """ +{ + "properties": { + "user": { + "type": "object", + "properties": { + "name": { "type": "text" }, + "email": { "type": "keyword" }, + "age": { "type": "integer" } + } + }, + "address": { + "type": "nested", + "properties": { + "street": { "type": "text" }, + "city": { "type": "keyword" }, + "zipcode": { "type": "keyword" } + } + } + } +} +""" +client.setMapping("users", complexMapping) + +// Mapping with custom analyzers +val searchMapping = """ +{ + "properties": { + "title": { + "type": "text", + "analyzer": "ngram_analyzer", + "search_analyzer": "search_analyzer" + }, + "category": { + "type": "keyword" + } + } +} +""" +client.setMapping("search-index", searchMapping) + +// Error handling +client.setMapping("my-index", "{ invalid json }") match { + case ElasticSuccess(true) => println("Mapping set") + case ElasticSuccess(false) => println("Mapping not set") + case ElasticFailure(error) => println(s"Error: ${error.message}") +} +``` + +--- + +### getMapping + +Retrieves the current mapping of an index as a JSON string. + +**Signature:** + +```scala +def getMapping(index: String): ElasticResult[String] +``` + +**Parameters:** +- `index` - The index name to retrieve the mapping from + +**Returns:** +- `ElasticSuccess[String]` containing the mapping as JSON +- `ElasticFailure` with error details (400 for validation, 404 if index not found) + +**Examples:** + +```scala +// Retrieve mapping +client.getMapping("products") match { + case ElasticSuccess(json) => + println(s"Mapping: $json") + case ElasticFailure(e) => + println(s"Error: ${e.message}") +} + +// Parse and inspect mapping +client.getMapping("my-index").map { json => + val mapping = parse(json) + val properties = (mapping \ "properties").extract[Map[String, Any]] + properties.keys.foreach(field => println(s"Field: $field")) +} + +// Compare mappings across indices +def compareMappings(index1: String, index2: String): ElasticResult[Boolean] = { + for { + mapping1 <- client.getMapping(index1) + mapping2 <- client.getMapping(index2) + } yield mapping1 == mapping2 +} + +// Backup mapping before changes +def backupMapping(index: String): ElasticResult[Unit] = { + client.getMapping(index).map { json => + saveToFile(s"$index-mapping-backup.json", json) + } +} + +// Extract specific field mapping +def getFieldMapping(index: String, field: String): ElasticResult[String] = { + client.getMapping(index).flatMap { json => + ElasticResult.attempt { + val mapping = parse(json) + val fieldMapping = (mapping \ "properties" \ field) + compact(render(fieldMapping)) + } + } +} +``` + +--- + +### getMappingProperties + +Retrieves the mapping properties of an index (alias for `getMapping`). + +**Signature:** + +```scala +def getMappingProperties(index: String): ElasticResult[String] +``` + +**Parameters:** +- `index` - The index name + +**Returns:** +- `ElasticSuccess[String]` containing the mapping properties as JSON +- `ElasticFailure` with error details + +**Note:** This method is functionally identical to `getMapping` and exists for semantic clarity. + +--- + +### shouldUpdateMapping + +Determines if an index's mapping differs from a provided mapping definition. + +**Signature:** + +```scala +def shouldUpdateMapping( + index: String, + mapping: String +): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - The index name to check +- `mapping` - The target mapping to compare against + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if mappings differ, `false` if identical +- `ElasticFailure` with error details + +**Behavior:** +- Uses `MappingComparator.isMappingDifferent` for comparison +- Compares structural differences in field definitions + +**Examples:** + +```scala +// Check if update needed +val newMapping = """ +{ + "properties": { + "title": { "type": "text" }, + "price": { "type": "double" }, + "new_field": { "type": "keyword" } + } +} +""" + +client.shouldUpdateMapping("products", newMapping) match { + case ElasticSuccess(true) => + println("Mapping needs update") + case ElasticSuccess(false) => + println("Mapping is current") + case ElasticFailure(e) => + println(s"Error: ${e.message}") +} + +// Conditional update +def updateIfNeeded(index: String, mapping: String): ElasticResult[Boolean] = { + for { + needsUpdate <- client.shouldUpdateMapping(index, mapping) + result <- if (needsUpdate) { + client.updateMapping(index, mapping) + } else { + ElasticResult.success(false) + } + } yield result +} + +// Audit mapping status +def auditMappings( + indices: List[String], + expectedMapping: String +): Map[String, Boolean] = { + indices.flatMap { index => + client.shouldUpdateMapping(index, expectedMapping) match { + case ElasticSuccess(needsUpdate) => Some(index -> needsUpdate) + case ElasticFailure(_) => None + } + }.toMap +} +``` + +--- + +### updateMapping + +Intelligently updates an index's mapping, handling three scenarios automatically: +1. Index doesn't exist → Create with mapping +2. Mapping is outdated → Migrate with rollback protection +3. Mapping is current → Do nothing + +**Signature:** + +```scala +def updateMapping( + index: String, + mapping: String, + settings: String = defaultSettings +): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - The index name to update +- `mapping` - The new mapping definition +- `settings` - Index settings (defaults to `defaultSettings`) + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if mapping created/updated successfully +- `ElasticFailure` with error details + +**Migration Process:** +When mapping update requires reindexing: +1. Backup original mapping and settings +2. Create temporary index with new mapping +3. Reindex data to temporary index +4. Delete original index +5. Recreate original index with new mapping +6. Reindex data back from temporary +7. Delete temporary index +8. **On failure:** Automatic rollback to original state + +**Examples:** + +```scala +// Simple update +val mapping = """ +{ + "properties": { + "title": { "type": "text" }, + "price": { "type": "double" } + } +} +""" +client.updateMapping("products", mapping) + +// Update with custom settings +val customSettings = """ +{ + "index": { + "number_of_shards": 3, + "number_of_replicas": 2 + } +} +""" +client.updateMapping("products", mapping, customSettings) + +// Safe production update +def safeProductionUpdate( + index: String, + newMapping: String +): ElasticResult[Boolean] = { + for { + // Backup current state + currentMapping <- client.getMapping(index) + _ = saveBackup(index, currentMapping) + + // Check if update needed + needsUpdate <- client.shouldUpdateMapping(index, newMapping) + + // Perform update if needed + result <- if (needsUpdate) { + println(s"Updating mapping for $index...") + client.updateMapping(index, newMapping) + } else { + println(s"Mapping for $index is already current") + ElasticResult.success(true) + } + } yield result +} + +// Batch update multiple indices +def updateAllIndices( + indices: List[String], + mapping: String +): List[(String, ElasticResult[Boolean])] = { + indices.map { index => + index -> client.updateMapping(index, mapping) + } +} + +// Update with verification +def updateAndVerify( + index: String, + mapping: String +): ElasticResult[Boolean] = { + for { + updated <- client.updateMapping(index, mapping) + _ <- if (updated) { + client.shouldUpdateMapping(index, mapping).flatMap { + case false => ElasticResult.success(()) + case true => ElasticResult.failure("Mapping verification failed") + } + } else { + ElasticResult.success(()) + } + } yield updated +} + +// Scheduled mapping updates +def scheduledMappingUpdate( + index: String, + mapping: String +): ElasticResult[Boolean] = { + // Disable refresh for better performance + for { + _ <- client.toggleRefresh(index, enable = false) + updated <- client.updateMapping(index, mapping) + _ <- client.toggleRefresh(index, enable = true) + _ <- client.refresh(index) + } yield updated +} +``` + +**Rollback Protection:** + +```scala +// Automatic rollback on failure +client.updateMapping("critical-index", newMapping) match { + case ElasticSuccess(true) => + println("✅ Migration successful") + case ElasticSuccess(false) => + println("⚠️ No update needed") + case ElasticFailure(error) => + println(s"❌ Migration failed: ${error.message}") + println("✅ Automatic rollback completed") + // Original index restored with original mapping +} +``` + +--- + +## Private Helper Methods + +### createIndexWithMapping + +Creates a new index with the specified mapping and settings. + +**Process:** +1. Create index with settings +2. Set mapping on the index + +```scala +private def createIndexWithMapping( + index: String, + mapping: String, + settings: String +): ElasticResult[Boolean] +``` + +--- + +### migrateMappingWithRollback + +Performs mapping migration with automatic rollback on failure. + +**Process:** +1. Backup original mapping and settings +2. Perform migration +3. On failure: Rollback to original state + +```scala +private def migrateMappingWithRollback( + index: String, + newMapping: String, + settings: String +): ElasticResult[Boolean] +``` + +--- + +### performMigration + +Executes the actual migration process using a temporary index. + +**Process:** +1. Create temporary index with new mapping +2. Reindex data from original to temporary +3. Delete original index +4. Recreate original with new mapping +5. Reindex data back from temporary +6. Delete temporary index + +```scala +private def performMigration( + index: String, + tempIndex: String, + mapping: String, + settings: String +): ElasticResult[Boolean] +``` + +**Temporary Index Naming:** +- Format: `{index}_tmp_{uuid}` +- Example: `products_tmp_a1b2c3d4` + +--- + +### rollbackMigration + +Restores index to original state after failed migration. + +**Process:** +1. Check if temporary index exists +2. Delete current (potentially corrupted) index +3. Recreate with original settings and mapping +4. Reindex from temporary if it exists +5. Cleanup temporary index + +```scala +private def rollbackMigration( + index: String, + tempIndex: String, + originalMapping: String, + originalSettings: String +): ElasticResult[Boolean] +``` + +--- + +## Implementation Requirements + +### executeSetMapping + +```scala +private[client] def executeSetMapping( + index: String, + mapping: String +): ElasticResult[Boolean] +``` + +--- + +### executeGetMapping + +```scala +private[client] def executeGetMapping(index: String): ElasticResult[String] +``` + +--- + +## Common Mapping Patterns + +### Basic Field Types + +```scala +val basicMapping = """ +{ + "properties": { + "text_field": { "type": "text" }, + "keyword_field": { "type": "keyword" }, + "integer_field": { "type": "integer" }, + "long_field": { "type": "long" }, + "double_field": { "type": "double" }, + "boolean_field": { "type": "boolean" }, + "date_field": { "type": "date" }, + "geo_point_field": { "type": "geo_point" } + } +} +""" +``` + +### Text Analysis + +```scala +val textAnalysisMapping = """ +{ + "properties": { + "title": { + "type": "text", + "analyzer": "standard", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + }, + "ngram": { + "type": "text", + "analyzer": "ngram_analyzer" + } + } + } + } +} +""" +``` + +### Nested Objects + +```scala +val nestedMapping = """ +{ + "properties": { + "user": { + "type": "nested", + "properties": { + "name": { "type": "text" }, + "email": { "type": "keyword" } + } + } + } +} +""" +``` + +### Dynamic Mapping Control + +```scala +val strictMapping = """ +{ + "dynamic": "strict", + "properties": { + "allowed_field": { "type": "text" } + } +} +""" +``` + +--- + +## Migration Workflows + +### Zero-Downtime Mapping Update + +```scala +def zeroDowntimeMappingUpdate( + index: String, + newMapping: String +): ElasticResult[Unit] = { + for { + // Check if migration needed + needsUpdate <- client.shouldUpdateMapping(index, newMapping) + + // Perform update with automatic rollback + _ <- if (needsUpdate) { + client.updateMapping(index, newMapping).map { success => + if (success) { + println(s"✅ Mapping updated for $index") + } else { + println(s"⚠️ Mapping update failed for $index") + } + } + } else { + println(s"✅ Mapping already current for $index") + ElasticResult.success(()) + } + } yield () +} +``` + +### Version-Based Migration + +```scala +def versionedMappingUpdate( + baseIndex: String, + version: Int, + mapping: String +): ElasticResult[String] = { + val newIndex = s"$baseIndex-v$version" + val alias = baseIndex + + for { + // Create new versioned index + _ <- client.createIndex(newIndex) + _ <- client.setMapping(newIndex, mapping) + + // Find current version + currentIndices <- findIndicesWithAlias(alias) + + // Reindex if previous version exists + _ <- currentIndices.headOption match { + case Some(oldIndex) => + client.reindex(oldIndex, newIndex) + case None => + ElasticResult.success((true, None)) + } + + // Swap alias + _ <- currentIndices.headOption match { + case Some(oldIndex) => + client.swapAlias(oldIndex, newIndex, alias) + case None => + client.addAlias(newIndex, alias) + } + } yield newIndex +} +``` + +### Incremental Mapping Evolution + +```scala +def evolveMappingIncrementally( + index: String, + changes: List[String] +): ElasticResult[Boolean] = { + changes.foldLeft(ElasticResult.success(true)) { (acc, change) => + acc.flatMap { _ => + client.setMapping(index, change) + } + } +} + +// Example usage +val changes = List( + """{"properties": {"new_field_1": {"type": "text"}}}""", + """{"properties": {"new_field_2": {"type": "keyword"}}}""", + """{"properties": {"new_field_3": {"type": "date"}}}""" +) + +evolveMappingIncrementally("my-index", changes) +``` + +--- + +## Error Handling + +**Invalid Mapping JSON:** + +```scala +client.setMapping("my-index", "{ invalid }") match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.message.contains("Invalid mapping")) +} +``` + +**Index Not Found:** + +```scala +client.getMapping("non-existent") match { + case ElasticFailure(error) => + assert(error.statusCode.contains(404)) +} +``` + +**Migration Failure with Rollback:** + +```scala +client.updateMapping("my-index", incompatibleMapping) match { + case ElasticFailure(error) => + println(s"Migration failed: ${error.message}") + println("Original mapping and data restored") + // Index is back to original state +} +``` + +--- + +## Best Practices + +**1. Always Backup Before Migration** + +```scala +def safeMigration(index: String, mapping: String): ElasticResult[Boolean] = { + for { + backup <- client.getMapping(index) + _ = saveToFile(s"$index-backup.json", backup) + updated <- client.updateMapping(index, mapping) + } yield updated +} +``` + +**2. Use Multi-Field Mappings for Flexibility** + +```scala +val flexibleMapping = """ +{ + "properties": { + "title": { + "type": "text", + "fields": { + "keyword": { "type": "keyword" }, + "ngram": { "type": "text", "analyzer": "ngram_analyzer" } + } + } + } +} +""" +``` + +**3. Plan for Schema Evolution** + +```scala +// Use dynamic templates for future fields +val evolutionMapping = """ +{ + "dynamic_templates": [ + { + "strings_as_keywords": { + "match_mapping_type": "string", + "mapping": { + "type": "keyword" + } + } + } + ], + "properties": { + "known_field": { "type": "text" } + } +} +""" +``` + +**4. Test Mappings in Development** + +```scala +def testMapping(mapping: String): ElasticResult[Boolean] = { + val testIndex = s"test-${UUID.randomUUID().toString}" + for { + _ <- client.createIndex(testIndex) + _ <- client.setMapping(testIndex, mapping) + _ <- client.deleteIndex(testIndex) + } yield true +} +``` + +--- + +[Back to index](README.md) | [Next: Index Documents](index.md) \ No newline at end of file diff --git a/documentation/client/refresh.md b/documentation/client/refresh.md new file mode 100644 index 00000000..a167c426 --- /dev/null +++ b/documentation/client/refresh.md @@ -0,0 +1,190 @@ +[Back to index](README.md) + +# REFRESH API + +## Overview + +The **RefreshApi** trait provides functionality to refresh Elasticsearch indices, making all recently indexed documents immediately searchable. This is useful for testing, real-time search requirements, or after bulk indexing operations. + +**Dependencies:** Extends `ElasticClientHelpers` for validation and logging utilities. + +--- + +## Public Methods + +### refresh + +Refreshes an index to ensure all documents are indexed and immediately searchable. + +**Signature:** + +```scala +def refresh(index: String): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - The name of the index to refresh + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if refresh succeeded, `false` otherwise +- `ElasticFailure` with error details if operation fails + +**Behavior:** +- Validates index name before execution (returns 400 error if invalid) +- Logs debug message before refresh attempt +- Logs success with ✅ or failure with ❌ +- Enriches validation errors with operation context (index name, status code 400, operation "refresh") + +**Examples:** + +```scala +// Basic refresh +val result = client.refresh("my-index") +result match { + case ElasticSuccess(true) => println("Index refreshed") + case ElasticSuccess(false) => println("Refresh not performed") + case ElasticFailure(e) => println(s"Error: ${e.message}") +} + +// Monadic chaining +for { + _ <- client.index("users", user) + refreshed <- client.refresh("users") + result <- client.search(query) +} yield result + +// Multiple indices refresh +val indices = List("index1", "index2", "index3") +val results = indices.map(client.refresh) +results.foreach { + case ElasticSuccess(_) => println("OK") + case ElasticFailure(e) => println(s"Failed: ${e.message}") +} + +// Conditional refresh for testing +def refreshIfTest(index: String): ElasticResult[Boolean] = { + if (sys.env.get("ENV").contains("test")) { + client.refresh(index) + } else { + ElasticResult.success(false) // Skip in production + } +} +``` + +**Common Use Cases:** + +- **Testing:** Ensure documents are searchable immediately after indexing +- **Bulk Operations:** Refresh after large batch imports +- **Real-time Search:** Force visibility of recent changes +- **Data Validation:** Verify indexing before downstream operations + +**Performance Considerations:** + +⚠️ Refreshing is expensive and should be used sparingly in production. Elasticsearch automatically refreshes indices every second by default. + +```scala +// ❌ Bad - refresh after each document +documents.foreach { doc => + client.index("products", id, doc) + client.refresh("products") // Too frequent! +} + +// ✅ Good - refresh once after bulk operation +client.bulk(documents) +client.refresh("products") +``` + +--- + +## Implementation Requirements + +### executeRefresh + +Must be implemented by each client-specific trait. + +**Signature:** + +```scala +private[client] def executeRefresh(index: String): ElasticResult[Boolean] +``` + +**Implementation Examples:** + +**REST High Level Client (ES 6-7):** + +```scala +private[client] def executeRefresh(index: String): ElasticResult[Boolean] = { + executeRestAction[RefreshResponse, Boolean]( + operation = "refresh", + index = Some(index) + )( + action = client.indices().refresh( + new RefreshRequest(index), + RequestOptions.DEFAULT + ) + )( + transformer = resp => resp.getStatus == RestStatus.OK + ) +} +``` + +**Java Client (ES 8-9):** + +```scala +private[client] def executeRefresh(index: String): ElasticResult[Boolean] = { + executeJavaAction[RefreshResponse, Boolean]( + operation = "refresh", + index = Some(index) + )( + action = client.indices().refresh( + new RefreshRequest.Builder().index(index).build() + ) + )( + transformer = resp => !resp.shards().failures().isEmpty + ) +} +``` + +**Jest Client (ES 5-6):** + +```scala +private[client] def executeRefresh(index: String): ElasticResult[Boolean] = { + executeJestAction[JestResult, Boolean]( + operation = "refresh", + index = Some(index) + )( + action = new Refresh.Builder().addIndex(index).build() + )( + transformer = _.isSucceeded + ) +} +``` + +--- + +## Error Handling + +**Invalid Index Name:** + +```scala +client.refresh("") match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.operation.contains("refresh")) + assert(error.index.isDefined) +} +``` + +**Index Not Found:** + +```scala +client.refresh("non-existent-index") match { + case ElasticFailure(error) => + // Typically 404 error from Elasticsearch + println(s"Index not found: ${error.message}") +} +``` + +--- + +[Back to index](README.md) | [Next: Indices Management](indices.md) \ No newline at end of file diff --git a/documentation/client/scroll.md b/documentation/client/scroll.md new file mode 100644 index 00000000..61c5b35e --- /dev/null +++ b/documentation/client/scroll.md @@ -0,0 +1,1740 @@ +[Back to index](README.md) + +# SCROLL API + +## Overview + +The **Scroll API** provides efficient streaming access to large result sets from Elasticsearch using **Akka Streams**. It automatically selects the optimal scrolling strategy based on your Elasticsearch version, and whether aggregations are present. + +**Key Features:** +- **Automatic strategy selection** (PIT + search_after, search_after, or classic scroll) +- **Akka Streams integration** for reactive data processing +- **Type-safe result conversion** with automatic deserialization +- **Built-in metrics tracking** (throughput, batches, duration) +- **Automatic error handling** with retry logic +- **Memory-efficient streaming** for large datasets +- **Configurable batch sizes** and limits + +**Dependencies:** +- Requires `SearchApi` for query execution +- Requires Akka Streams for reactive streaming +- Requires `ElasticConversion` for result parsing + +--- + +## Table of Contents + +1. [Core Concepts](#core-concepts) +2. [Scroll Strategies](#scroll-strategies) +3. [Configuration](#configuration) +4. [Basic Usage](#basic-usage) +5. [Typed Scrolling](#typed-scrolling) +6. [Metrics and Monitoring](#metrics-and-monitoring) +7. [Error Handling](#error-handling) +8. [Performance Tuning](#performance-tuning) +9. [Advanced Patterns](#advanced-patterns) +10. [Testing](#testing) +11. [Best Practices](#best-practices) + +--- + +## Core Concepts + +### Scrolling Strategies + +The API automatically selects the best strategy based on your query and the Elasticsearch version: + +**Strategy Selection Matrix:** + +| ES Version | Aggregations | Strategy | +|-----------------|---------------|----------------------------------| +| 7.10+ | No | PIT + search_after (recommended) | +| 7.10+ | Yes | Classic scroll | +| < 7.10 | No | search_after | +| < 7.10 | Yes | Classic scroll | + +--- + +### Scroll Strategy Types + +```scala +sealed trait ScrollStrategy + +// Point In Time + search_after (ES 7.10+, best performance) +case object UsePIT extends ScrollStrategy + +// search_after only (efficient, no server state) +case object UseSearchAfter extends ScrollStrategy + +// Classic scroll (supports aggregations) +case object UseScroll extends ScrollStrategy +``` + +**Strategy Comparison:** + +| Strategy | Server State | Aggregations | Deep Pagination | Timeout Issues | Performance | +|------------------------|---------------|---------------|------------------|-----------------|--------------| +| **PIT + search_after** | Minimal | ❌ No | ✅ Excellent | ❌ None | ⭐⭐⭐⭐⭐ | +| **search_after** | None | ❌ No | ✅ Good | ❌ None | ⭐⭐⭐⭐ | +| **Classic scroll** | Yes | ✅ Yes | ⚠️ Limited | ⚠️ Possible | ⭐⭐⭐ | + +--- + +### Source Types + +```scala +// Basic scroll source (returns Map with metrics) +def scroll( + sql: SQLQuery, + config: ScrollConfig = ScrollConfig() +)(implicit system: ActorSystem): Source[(Map[String, Any], ScrollMetrics), NotUsed] + +// Typed scroll source (automatic deserialization) +def scrollAs[T]( + sql: SQLQuery, + config: ScrollConfig = ScrollConfig() +)(implicit + system: ActorSystem, + m: Manifest[T], + formats: Formats +): Source[(T, ScrollMetrics), NotUsed] +``` + +--- + +## Scroll Strategies + +### Point In Time (PIT) + search_after + +**Best for:** ES 7.10+, large result sets, no aggregations + +**Advantages:** +- ✅ Consistent snapshot across pagination +- ✅ No scroll timeout issues +- ✅ Better resource usage +- ✅ Automatic cleanup +- ✅ Suitable for deep pagination + +**Limitations:** +- ❌ Not supported with aggregations +- ❌ Requires ES 7.10+ + +```scala +// Automatically used for ES 7.10+ without aggregations +val query = SQLQuery( + query = """ + SELECT id, name, price + FROM products + WHERE category = 'electronics' + ORDER BY price DESC + """ +) + +// Will use PIT + search_after strategy +client.scroll(query).runWith(Sink.seq) +``` + +--- + +### search_after + +**Best for:** ES < 7.10, large result sets, no aggregations + +**Advantages:** +- ✅ No server-side state +- ✅ Efficient pagination +- ✅ No timeout issues +- ✅ Good for deep pagination + +**Limitations:** +- ❌ Not supported with aggregations +- ❌ Requires sort fields +- ⚠️ No consistent snapshot (data can change between pages) + +```scala +// Automatically used for ES < 7.10 without aggregations +val query = SQLQuery( + query = """ + SELECT id, name, price + FROM products + WHERE category = 'electronics' + ORDER BY created_at DESC, id ASC + """ +) + +// Will use search_after strategy +client.scroll(query).runWith(Sink.seq) +``` + +--- + +### Classic Scroll + +**Best for:** Queries with aggregations, consistent snapshots required + +**Advantages:** +- ✅ Supports aggregations +- ✅ Consistent snapshot +- ✅ Works on all ES versions +- ✅ Automatic cleanup + +**Limitations:** +- ⚠️ Server-side state (scroll context) +- ⚠️ Subject to scroll timeout +- ⚠️ Higher resource usage +- ⚠️ Limited deep pagination + +```scala +// Automatically used when aggregations are present +val query = SQLQuery( + query = """ + SELECT + category, + COUNT(*) as total, + AVG(price) as avg_price + FROM products + GROUP BY category + """ +) + +// Will use classic scroll strategy +client.scroll(query).runWith(Sink.seq) +``` + +--- + +## Configuration + +### ScrollConfig + +```scala +case class ScrollConfig( + // Batch size (documents per request) + scrollSize: Int = 1000, + + // Keep-alive time for scroll context + keepAlive: String = "1m", + + // Maximum documents to retrieve (None = unlimited) + maxDocuments: Option[Long] = None, + + // Prefer search_after over classic scroll + preferSearchAfter: Boolean = true, + + // Log progress every N batches + logEvery: Int = 10, + + // Initial metrics + metrics: ScrollMetrics = ScrollMetrics(), + + // Retry configuration + retryConfig: RetryConfig = RetryConfig() +) +``` + +**Configuration Options:** + +| Parameter | Type | Default | Description | +|---------------------|-----------------|-------------------|----------------------------------------------| +| `scrollSize` | `Int` | `1000` | Number of documents per batch | +| `keepAlive` | `String` | `"1m"` | Scroll context timeout (classic scroll only) | +| `maxDocuments` | `Option[Long]` | `None` | Maximum documents to retrieve | +| `preferSearchAfter` | `Boolean` | `true` | Prefer search_after when available | +| `logEvery` | `Int` | `10` | Log progress every N batches | +| `metrics` | `ScrollMetrics` | `ScrollMetrics()` | Initial metrics state | + +--- + +### ScrollMetrics + +```scala +case class ScrollMetrics( + totalDocuments: Long = 0, + totalBatches: Int = 0, + startTime: Long = System.currentTimeMillis(), + endTime: Option[Long] = None +) { + // Calculate duration in milliseconds + def duration: Long = endTime.getOrElse(System.currentTimeMillis()) - startTime + + // Calculate throughput (documents per second) + def documentsPerSecond: Double = { + val durationSec = duration / 1000.0 + if (durationSec > 0) totalDocuments / durationSec else 0.0 + } + + // Mark as complete + def complete: ScrollMetrics = copy(endTime = Some(System.currentTimeMillis())) +} +``` + +**Metrics Fields:** + +| Field | Type | Description | +|----------------------|----------------|--------------------------------| +| `totalDocuments` | `Long` | Total documents retrieved | +| `totalBatches` | `Int` | Total batches processed | +| `startTime` | `Long` | Start timestamp (milliseconds) | +| `endTime` | `Option[Long]` | End timestamp (milliseconds) | +| `duration` | `Long` | Total duration (milliseconds) | +| `documentsPerSecond` | `Double` | Throughput rate | + +--- + +## Basic Usage + +### Simple Scrolling + +```scala +import akka.actor.ActorSystem +import akka.stream.scaladsl.{Sink, Source} +import scala.concurrent.ExecutionContext.Implicits.global + +implicit val system: ActorSystem = ActorSystem("scroll-example") + +// Simple SQL query +val query = SQLQuery( + query = """ + SELECT id, name, price, category + FROM products + WHERE price > 100 + ORDER BY price DESC + """ +) + +// Scroll through results +client.scroll(query).runWith(Sink.foreach { case (doc, metrics) => + println(s"Document: $doc") + println(s"Progress: ${metrics.totalDocuments} docs, ${metrics.documentsPerSecond} docs/sec") +}) +``` + +--- + +### Collecting All Results + +```scala +// Collect all documents into a sequence +val allDocs: Future[Seq[(Map[String, Any], ScrollMetrics)]] = + client.scroll(query).runWith(Sink.seq) + +allDocs.foreach { results => + println(s"Retrieved ${results.size} documents") + + // Get final metrics + val finalMetrics = results.lastOption.map(_._2) + finalMetrics.foreach { m => + println(s"Total time: ${m.duration}ms") + println(s"Throughput: ${m.documentsPerSecond} docs/sec") + } + + // Process documents + results.foreach { case (doc, _) => + println(s"ID: ${doc.get("id")}, Name: ${doc.get("name")}") + } +} +``` + +--- + +### Limited Scrolling + +```scala +// Limit to first 5000 documents +val config = ScrollConfig( + scrollSize = 500, + maxDocuments = Some(5000) +) + +client.scroll(query, config).runWith(Sink.foreach { case (doc, metrics) => + println(s"Document ${metrics.totalDocuments}: ${doc.get("name")}") +}) +``` + +--- + +### Custom Batch Size + +```scala +// Process in batches of 2000 +val config = ScrollConfig(scrollSize = 2000) + +client.scroll(query, config) + .grouped(2000) + .runWith(Sink.foreach { batch => + println(s"Processing batch of ${batch.size} documents") + // Batch processing logic + processBatch(batch.map(_._1)) + }) +``` + +--- + +## Typed Scrolling + +### Basic Typed Scrolling + +```scala +case class Product( + id: String, + name: String, + price: Double, + category: String, + stock: Int +) + +implicit val formats: Formats = DefaultFormats + +val query = SQLQuery( + query = """ + SELECT id, name, price, category, stock + FROM products + WHERE category = 'electronics' + """ +) + +// Scroll with automatic type conversion +client.scrollAs[Product](query).runWith(Sink.foreach { case (product, metrics) => + println(s"Product: ${product.name} - $${product.price}") + println(s"Progress: ${metrics.totalDocuments} products") +}) +``` + +--- + +### Collecting Typed Results + +```scala +// Collect all products +val allProducts: Future[Seq[Product]] = + client.scrollAs[Product](query) + .map(_._1) // Extract product, discard metrics + .runWith(Sink.seq) + +allProducts.foreach { products => + println(s"Retrieved ${products.size} products") + + val totalValue = products.map(_.price).sum + println(f"Total inventory value: $$${totalValue}%,.2f") +} +``` + +--- + +### Filtering Typed Results + +```scala +// Filter expensive products during streaming +client.scrollAs[Product](query) + .filter { case (product, _) => product.price > 500 } + .map(_._1) // Extract product + .runWith(Sink.seq) + .foreach { expensiveProducts => + println(s"Found ${expensiveProducts.size} expensive products") + expensiveProducts.foreach { p => + println(s" ${p.name}: $${p.price}") + } + } +``` + +--- + +### Transforming Typed Results + +```scala +case class ProductSummary(name: String, value: Double) + +client.scrollAs[Product](query) + .map { case (product, _) => + ProductSummary( + name = product.name, + value = product.price * product.stock + ) + } + .runWith(Sink.seq) + .foreach { summaries => + val totalValue = summaries.map(_.value).sum + println(f"Total inventory value: $$${totalValue}%,.2f") + } +``` + +--- + +## Metrics and Monitoring + +### Tracking Progress + +```scala +val config = ScrollConfig( + scrollSize = 1000, + logEvery = 5 // Log every 5 batches +) + +client.scroll(query, config).runWith(Sink.foreach { case (doc, metrics) => + // Metrics are automatically updated + if (metrics.totalBatches % 5 == 0) { + println(s"Progress Report:") + println(s" Documents: ${metrics.totalDocuments}") + println(s" Batches: ${metrics.totalBatches}") + println(s" Duration: ${metrics.duration}ms") + println(s" Throughput: ${metrics.documentsPerSecond} docs/sec") + } +}) + +// Output: +// Progress Report: +// Documents: 5000 +// Batches: 5 +// Duration: 2345ms +// Throughput: 2132.2 docs/sec +``` + +--- + +### Final Metrics + +```scala +client.scroll(query) + .runWith(Sink.last) + .foreach { case (_, finalMetrics) => + val completed = finalMetrics.complete + + println("Scroll Completed!") + println(s" Total Documents: ${completed.totalDocuments}") + println(s" Total Batches: ${completed.totalBatches}") + println(s" Total Duration: ${completed.duration}ms") + println(s" Average Throughput: ${completed.documentsPerSecond} docs/sec") + } +``` + +--- + +### Custom Metrics Tracking + +```scala +case class CustomMetrics( + scrollMetrics: ScrollMetrics, + processedCount: Long = 0, + errorCount: Long = 0, + skippedCount: Long = 0 +) + +client.scroll(query) + .scan(CustomMetrics(ScrollMetrics())) { case (custom, (doc, scrollMetrics)) => + // Update custom metrics + val processed = if (processDocument(doc)) { + custom.copy( + scrollMetrics = scrollMetrics, + processedCount = custom.processedCount + 1 + ) + } else { + custom.copy( + scrollMetrics = scrollMetrics, + skippedCount = custom.skippedCount + 1 + ) + } + processed + } + .runWith(Sink.last) + .foreach { finalCustom => + println(s"Processed: ${finalCustom.processedCount}") + println(s"Skipped: ${finalCustom.skippedCount}") + println(s"Errors: ${finalCustom.errorCount}") + } + +def processDocument(doc: Map[String, Any]): Boolean = { + // Processing logic + true +} +``` + +--- + +## Error Handling + +### Built-in Error Handling + +The API automatically handles: + +- ✅ Network timeouts (with retry) +- ✅ Expired scroll contexts +- ✅ Elasticsearch errors +- ✅ Connection issues + +```scala +// Automatic error handling is built-in +client.scroll(query).runWith(Sink.seq).recover { + case ex: Exception => + logger.error("Scroll failed", ex) + Seq.empty +} +``` + +--- + +### Custom Error Recovery + +```scala +import akka.stream.Supervision + +// Define custom recovery strategy +implicit val decider: Supervision.Decider = { + case _: java.net.SocketTimeoutException => + logger.warn("Timeout, resuming...") + Supervision.Resume + + case _: org.elasticsearch.ElasticsearchException => + logger.error("ES error, stopping...") + Supervision.Stop + + case ex => + logger.error(s"Unexpected error: ${ex.getMessage}") + Supervision.Stop +} + +// Apply supervision strategy +client.scroll(query) + .withAttributes(ActorAttributes.supervisionStrategy(decider)) + .runWith(Sink.seq) +``` + +--- + +### Retry Logic + +```scala +import akka.stream.scaladsl.RetryFlow +import scala.concurrent.duration._ + +// Add retry logic for failed batches +client.scroll(query) + .via(RetryFlow.withBackoff( + minBackoff = 1.second, + maxBackoff = 10.seconds, + randomFactor = 0.2, + maxRetries = 3 + ) { case (doc, _) => + Future { + processDocument(doc) + } + }) + .runWith(Sink.seq) +``` + +--- + +### Error Logging + +```scala +client.scroll(query) + .recover { + case ex: java.net.SocketTimeoutException => + logger.error("Network timeout", ex) + throw ex + + case ex: org.elasticsearch.ElasticsearchException => + logger.error(s"Elasticsearch error: ${ex.getMessage}", ex) + throw ex + + case ex: Exception => + logger.error("Unexpected error during scroll", ex) + throw ex + } + .runWith(Sink.seq) + .recover { + case ex => + logger.error("Failed to complete scroll", ex) + Seq.empty + } +``` + +--- + +### Graceful Degradation + +```scala +def scrollWithFallback(query: SQLQuery): Future[Seq[Map[String, Any]]] = { + client.scroll(query) + .map(_._1) // Extract documents + .runWith(Sink.seq) + .recoverWith { + case ex: Exception => + logger.warn(s"Scroll failed, trying regular search: ${ex.getMessage}") + + // Fallback to regular search + client.search(query).map { + case ElasticSuccess(results) => results + case ElasticFailure(error) => + logger.error(s"Fallback also failed: ${error.message}") + Seq.empty + } + } +} +``` + +--- + +## Performance Tuning + +### Optimal Batch Size + +```scala +// Small documents (< 1KB each) +val smallDocConfig = ScrollConfig(scrollSize = 5000) + +// Medium documents (1-10KB each) +val mediumDocConfig = ScrollConfig(scrollSize = 1000) + +// Large documents (> 10KB each) +val largeDocConfig = ScrollConfig(scrollSize = 100) + +// Choose based on document size +val config = if (avgDocSize < 1024) smallDocConfig + else if (avgDocSize < 10240) mediumDocConfig + else largeDocConfig + +client.scroll(query, config).runWith(Sink.seq) +``` + +--- + +### Parallel Processing + +```scala +// Process batches in parallel +client.scroll(query) + .grouped(1000) + .mapAsync(parallelism = 4) { batch => + Future { + // Parallel batch processing + processBatchInParallel(batch.map(_._1)) + } + } + .runWith(Sink.ignore) +``` + +--- + +### Memory Management + +```scala +// Stream to file to avoid memory issues +import java.io.PrintWriter + +val writer = new PrintWriter("results.json") + +client.scroll(query) + .map { case (doc, _) => + // Convert to JSON string + compact(render(Extraction.decompose(doc))) + } + .runWith(Sink.foreach { json => + writer.println(json) + }) + .onComplete { _ => + writer.close() + println("Results written to file") + } +``` + +--- + +### Backpressure Handling + +```scala +// Add buffer to handle backpressure +client.scroll(query) + .buffer(100, OverflowStrategy.backpressure) + .mapAsync(parallelism = 2) { case (doc, _) => + // Slow processing + processDocumentAsync(doc) + } + .runWith(Sink.seq) +``` + +--- + +### Throttling + +```scala +import scala.concurrent.duration._ + +// Throttle to 100 documents per second +client.scroll(query) + .throttle(100, 1.second) + .runWith(Sink.foreach { case (doc, metrics) => + println(s"Processing: ${doc.get("id")}") + }) +``` + +--- + +## Advanced Patterns + +### Batch Processing with Commit + +```scala +// Process in batches with commit points +client.scroll(query) + .grouped(1000) + .mapAsync(1) { batch => + for { + _ <- processBatch(batch.map(_._1)) + _ <- commitBatch(batch.size) + } yield batch.size + } + .runWith(Sink.fold(0)(_ + _)) + .foreach { total => + println(s"Processed and committed $total documents") + } + +def processBatch(docs: Seq[Map[String, Any]]): Future[Unit] = { + // Batch processing logic + Future.successful(()) +} + +def commitBatch(size: Int): Future[Unit] = { + // Commit logic (e.g., database transaction) + Future.successful(()) +} +``` + +--- + +### Data Transformation Pipeline + +```scala +case class RawProduct(id: String, name: String, price: Double) +case class EnrichedProduct(id: String, name: String, price: Double, category: String, tags: Seq[String]) + +client.scrollAs[RawProduct](query) + .mapAsync(parallelism = 4) { case (raw, _) => + // Enrich each product + enrichProduct(raw) + } + .filter(_.tags.nonEmpty) // Filter enriched products + .grouped(100) + .mapAsync(1) { batch => + // Bulk index enriched products + bulkIndexProducts(batch) + } + .runWith(Sink.ignore) + +def enrichProduct(raw: RawProduct): Future[EnrichedProduct] = { + // Enrichment logic (e.g., external API call) + Future.successful( + EnrichedProduct( + raw.id, + raw.name, + raw.price, + "electronics", + Seq("popular", "sale") + ) + ) +} + +def bulkIndexProducts(products: Seq[EnrichedProduct]): Future[Unit] = { + // Bulk indexing logic + Future.successful(()) +} +``` + +--- + +### Fan-Out Processing + +```scala +import akka.stream.scaladsl.Broadcast + +// Fan-out to multiple sinks +val graph = RunnableGraph.fromGraph(GraphDSL.create() { implicit builder => + import GraphDSL.Implicits._ + + val source = client.scroll(query).map(_._1) + val broadcast = builder.add(Broadcast[Map[String, Any]](3)) + + // Sink 1: Write to file + val fileSink = Sink.foreach[Map[String, Any]] { doc => + writeToFile(doc) + } + + // Sink 2: Index to another ES + val indexSink = Sink.foreach[Map[String, Any]] { doc => + indexToElasticsearch(doc) + } + + // Sink 3: Send to Kafka + val kafkaSink = Sink.foreach[Map[String, Any]] { doc => + sendToKafka(doc) + } + + source ~> broadcast + broadcast ~> fileSink + broadcast ~> indexSink + broadcast ~> kafkaSink + + ClosedShape +}) + +graph.run() + +def writeToFile(doc: Map[String, Any]): Unit = { /* ... */ } +def indexToElasticsearch(doc: Map[String, Any]): Unit = { /* ... */ } +def sendToKafka(doc: Map[String, Any]): Unit = { /* ... */ } +``` + +--- + +### Aggregating During Scroll + +```scala +case class Statistics( + count: Long = 0, + sum: Double = 0.0, + min: Double = Double.MaxValue, + max: Double = Double.MinValue +) { + def avg: Double = if (count > 0) sum / count else 0.0 + + def update(value: Double): Statistics = Statistics( + count = count + 1, + sum = sum + value, + min = math.min(min, value), + max = math.max(max, value) + ) +} + +client.scrollAs[Product](query) + .map(_._1.price) // Extract prices + .fold(Statistics())(_ update _) + .runWith(Sink.head) + .foreach { stats => + println(s"Price Statistics:") + println(f" Count: ${stats.count}") + println(f" Average: $$${stats.avg}%.2f") + println(f" Min: $$${stats.min}%.2f") + println(f" Max: $$${stats.max}%.2f") + } +``` + +--- + +### Conditional Processing + +```scala +client.scrollAs[Product](query) + .mapAsync(parallelism = 4) { case (product, _) => + product.category match { + case "electronics" => processElectronics(product) + case "clothing" => processClothing(product) + case "books" => processBooks(product) + case _ => processGeneric(product) + } + } + .runWith(Sink.ignore) + +def processElectronics(p: Product): Future[Unit] = Future.successful(()) +def processClothing(p: Product): Future[Unit] = Future.successful(()) +def processBooks(p: Product): Future[Unit] = Future.successful(()) +def processGeneric(p: Product): Future[Unit] = Future.successful(()) +``` + +--- + +## Testing + +### Test Basic Scrolling + +```scala +import org.scalatest.flatspec.AsyncFlatSpec +import org.scalatest.matchers.should.Matchers +import akka.stream.scaladsl.Sink + +class ScrollApiSpec extends AsyncFlatSpec with Matchers { + + implicit val system: ActorSystem = ActorSystem("test") + + "ScrollApi" should "scroll through all documents" in { + val testIndex = "test-scroll" + + for { + // Setup + _ <- client.createIndexAsync(testIndex) + _ <- Future.sequence((1 to 100).map { i => + client.indexAsync(testIndex, i.toString, s"""{"id": $i, "value": ${i * 10}}""") + }) + _ <- client.refreshAsync(testIndex) + + // Test + query = SQLQuery(query = s"SELECT * FROM $testIndex") + results <- client.scroll(query).map(_._1).runWith(Sink.seq) + + // Assertions + _ = { + results should have size 100 + results.map(_("id").toString.toInt).sorted shouldBe (1 to 100) + } + + // Cleanup + _ <- client.deleteIndexAsync(testIndex) + } yield succeed + } +} +``` + +--- + +### Test Typed Scrolling + +```scala +"ScrollApi" should "scroll with type conversion" in { + case class TestDoc(id: Int, value: Int) + implicit val formats: Formats = DefaultFormats + + val testIndex = "test-typed-scroll" + + for { + // Setup + _ <- client.createIndexAsync(testIndex) + _ <- Future.sequence((1 to 50).map { i => + client.indexAsync(testIndex, i.toString, s"""{"id": $i, "value": ${i * 10}}""") + }) + _ <- client.refreshAsync(testIndex) + + // Test + query = SQLQuery(query = s"SELECT id, value FROM $testIndex") + results <- client.scrollAs[TestDoc](query).map(_._1).runWith(Sink.seq) + + // Assertions + _ = { + results should have size 50 + results.map(_.id).sorted shouldBe (1 to 50) + results.foreach { doc => + doc.value shouldBe doc.id * 10 + } + } + + // Cleanup + _ <- client.deleteIndexAsync(testIndex) + } yield succeed +} +``` + +--- + +## Test Metrics Tracking + +```scala +"ScrollApi" should "track metrics correctly" in { + val testIndex = "test-metrics" + + for { + // Setup + _ <- client.createIndexAsync(testIndex) + _ <- Future.sequence((1 to 1000).map { i => + client.indexAsync(testIndex, i.toString, s"""{"id": $i}""") + }) + _ <- client.refreshAsync(testIndex) + + // Test + query = SQLQuery(query = s"SELECT * FROM $testIndex") + config = ScrollConfig(scrollSize = 100) + + lastMetrics <- client.scroll(query, config) + .map(_._2) + .runWith(Sink.last) + + // Assertions + _ = { + val finalMetrics = lastMetrics.complete + finalMetrics.totalDocuments shouldBe 1000 + finalMetrics.totalBatches shouldBe 10 + finalMetrics.duration should be > 0L + finalMetrics.documentsPerSecond should be > 0.0 + } + + // Cleanup + _ <- client.deleteIndexAsync(testIndex) + } yield succeed +} +``` + +--- + +### Test Max Documents Limit + +```scala +"ScrollApi" should "respect maxDocuments limit" in { + val testIndex = "test-limit" + + for { + // Setup + _ <- client.createIndexAsync(testIndex) + _ <- Future.sequence((1 to 1000).map { i => + client.indexAsync(testIndex, i.toString, s"""{"id": $i}""") + }) + _ <- client.refreshAsync(testIndex) + + // Test with limit + query = SQLQuery(query = s"SELECT * FROM $testIndex") + config = ScrollConfig( + scrollSize = 100, + maxDocuments = Some(250) + ) + + results <- client.scroll(query, config).map(_._1).runWith(Sink.seq) + + // Assertions + _ = { + results.size shouldBe 250 + } + + // Cleanup + _ <- client.deleteIndexAsync(testIndex) + } yield succeed +} +``` + +--- + +### Test Error Recovery + +```scala +"ScrollApi" should "handle errors gracefully" in { + val testIndex = "test-error-recovery" + + for { + // Setup + _ <- client.createIndexAsync(testIndex) + _ <- Future.sequence((1 to 100).map { i => + client.indexAsync(testIndex, i.toString, s"""{"id": $i}""") + }) + _ <- client.refreshAsync(testIndex) + + // Test with error handling + query = SQLQuery(query = s"SELECT * FROM $testIndex") + + result <- client.scroll(query) + .map(_._1) + .runWith(Sink.seq) + .recover { + case ex: Exception => + // Should recover from errors + Seq.empty + } + + // Assertions + _ = { + result should not be empty + } + + // Cleanup + _ <- client.deleteIndexAsync(testIndex) + } yield succeed +} +``` + +--- + +### Test Empty Index + +```scala +"ScrollApi" should "handle empty index" in { + val testIndex = "test-empty" + + for { + // Setup empty index + _ <- client.createIndexAsync(testIndex) + _ <- client.refreshAsync(testIndex) + + // Test + query = SQLQuery(query = s"SELECT * FROM $testIndex") + results <- client.scroll(query).map(_._1).runWith(Sink.seq) + + // Assertions + _ = { + results shouldBe empty + } + + // Cleanup + _ <- client.deleteIndexAsync(testIndex) + } yield succeed +} +``` + +--- + +### Test Batch Processing + +```scala +"ScrollApi" should "process documents in batches" in { + val testIndex = "test-batches" + + for { + // Setup + _ <- client.createIndexAsync(testIndex) + _ <- Future.sequence((1 to 500).map { i => + client.indexAsync(testIndex, i.toString, s"""{"id": $i}""") + }) + _ <- client.refreshAsync(testIndex) + + // Test + query = SQLQuery(query = s"SELECT * FROM $testIndex") + config = ScrollConfig(scrollSize = 100) + + batches <- client.scroll(query, config) + .map(_._1) + .grouped(100) + .runWith(Sink.seq) + + // Assertions + _ = { + batches should have size 5 + batches.foreach { batch => + batch should have size 100 + } + } + + // Cleanup + _ <- client.deleteIndexAsync(testIndex) + } yield succeed +} +``` + +--- + +## Best Practices + +### 1. Choose Appropriate Batch Size + +```scala +// ❌ BAD: Too small batch size (too many requests) +val badConfig = ScrollConfig(scrollSize = 10) + +// ❌ BAD: Too large batch size (memory issues) +val tooBigConfig = ScrollConfig(scrollSize = 50000) + +// ✅ GOOD: Reasonable batch size based on document size +val goodConfig = ScrollConfig( + scrollSize = if (avgDocumentSize < 1024) 5000 + else if (avgDocumentSize < 10240) 1000 + else 100 +) + +client.scroll(query, goodConfig).runWith(Sink.seq) +``` + +--- + +### 2. Always Set maxDocuments for Safety + +```scala +// ❌ BAD: No limit (could consume all memory) +client.scroll(query).runWith(Sink.seq) + +// ✅ GOOD: Set reasonable limit +val config = ScrollConfig( + scrollSize = 1000, + maxDocuments = Some(100000) // Safety limit +) + +client.scroll(query, config).runWith(Sink.seq) +``` + +--- + +### 3. Use Typed Scrolling When Possible + +```scala +case class Product(id: String, name: String, price: Double) + +// ❌ BAD: Manual type conversion +client.scroll(query).map { case (doc, _) => + Product( + doc("id").toString, + doc("name").toString, + doc("price").toString.toDouble + ) +}.runWith(Sink.seq) + +// ✅ GOOD: Automatic type conversion +implicit val formats: Formats = DefaultFormats +client.scrollAs[Product](query) + .map(_._1) + .runWith(Sink.seq) +``` + +--- + +### 4. Handle Backpressure + +```scala +// ✅ GOOD: Add buffer for backpressure handling +client.scroll(query) + .buffer(100, OverflowStrategy.backpressure) + .mapAsync(parallelism = 4) { case (doc, _) => + // Slow async processing + processDocumentAsync(doc) + } + .runWith(Sink.ignore) +``` + +--- + +### 5. Monitor Progress with Metrics + +```scala +// ✅ GOOD: Log progress regularly +val config = ScrollConfig( + scrollSize = 1000, + logEvery = 10 // Log every 10 batches +) + +client.scroll(query, config) + .runWith(Sink.foreach { case (doc, metrics) => + // Metrics are automatically logged + // Custom processing here + processDocument(doc) + }) +``` + +--- + +### 6. Clean Up Resources + +```scala +// ✅ GOOD: Ensure proper cleanup +val scrollFuture = client.scroll(query).runWith(Sink.seq) + +scrollFuture.onComplete { + case Success(results) => + logger.info(s"Scroll completed: ${results.size} documents") + // Cleanup is automatic + + case Failure(ex) => + logger.error("Scroll failed", ex) + // Cleanup is automatic even on failure +} +``` + +--- + +### 7. Use Appropriate Strategy + +```scala +// ✅ GOOD: Let the API choose the best strategy +val query = SQLQuery( + query = """ + SELECT id, name, price + FROM products + WHERE category = 'electronics' + ORDER BY price DESC + """ +) + +// Automatically uses: +// - PIT + search_after for ES 7.10+ (best performance) +// - search_after for ES < 7.10 +// - Classic scroll for aggregations + +client.scroll(query).runWith(Sink.seq) +``` + +--- + +### 8. Handle Large Result Sets Efficiently + +```scala +// ✅ GOOD: Stream to file instead of collecting in memory +import java.io.{BufferedWriter, FileWriter} + +val writer = new BufferedWriter(new FileWriter("results.jsonl")) + +client.scroll(query) + .map { case (doc, _) => + compact(render(Extraction.decompose(doc))) + } + .runWith(Sink.foreach { json => + writer.write(json) + writer.newLine() + }) + .onComplete { _ => + writer.close() + logger.info("Results written to file") + } +``` + +--- + +### 9. Implement Proper Error Handling + +```scala +// ✅ GOOD: Comprehensive error handling +implicit val decider: Supervision.Decider = { + case _: java.net.SocketTimeoutException => + logger.warn("Network timeout, resuming...") + Supervision.Resume + + case ex: org.elasticsearch.ElasticsearchException => + logger.error(s"ES error: ${ex.getMessage}") + Supervision.Stop + + case ex => + logger.error(s"Unexpected error: ${ex.getMessage}", ex) + Supervision.Stop +} + +client.scroll(query) + .withAttributes(ActorAttributes.supervisionStrategy(decider)) + .runWith(Sink.seq) + .recover { + case ex: Exception => + logger.error("Failed to complete scroll", ex) + Seq.empty + } +``` + +--- + +### 10. Optimize Queries + +```scala +// ❌ BAD: Select all fields (wastes bandwidth) +val badQuery = SQLQuery(query = "SELECT * FROM products") + +// ✅ GOOD: Select only needed fields +val goodQuery = SQLQuery( + query = """ + SELECT id, name, price + FROM products + WHERE category = 'electronics' + AND price > 100 + ORDER BY price DESC + """ +) + +client.scroll(goodQuery).runWith(Sink.seq) +``` + +--- + +### 11. Use Parallel Processing Wisely + +```scala +// ✅ GOOD: Balance parallelism with resources +val parallelism = Runtime.getRuntime.availableProcessors() + +client.scroll(query) + .mapAsync(parallelism) { case (doc, _) => + // Process documents in parallel + processDocumentAsync(doc) + } + .runWith(Sink.ignore) +``` + +--- + +### 12. Test Scroll Behavior + +```scala +// ✅ GOOD: Test with different scenarios +class ScrollBehaviorSpec extends AsyncFlatSpec with Matchers { + + "Scroll" should "work with small datasets" in { + testScroll(documentCount = 100) + } + + it should "work with large datasets" in { + testScroll(documentCount = 10000) + } + + it should "work with empty results" in { + testScroll(documentCount = 0) + } + + it should "respect maxDocuments limit" in { + testScrollWithLimit( + documentCount = 1000, + maxDocuments = 500 + ) + } + + def testScroll(documentCount: Int): Future[Assertion] = { + // Test implementation + Future.successful(succeed) + } + + def testScrollWithLimit( + documentCount: Int, + maxDocuments: Int + ): Future[Assertion] = { + // Test implementation + Future.successful(succeed) + } +} +``` + +--- + +## Common Patterns + +### Pattern 1: Export to File + +```scala +def exportToFile( + query: SQLQuery, + outputPath: String +): Future[Long] = { + val writer = new PrintWriter(new FileWriter(outputPath)) + + client.scroll(query) + .map { case (doc, _) => + compact(render(Extraction.decompose(doc))) + } + .runWith(Sink.fold(0L) { (count, json) => + writer.println(json) + count + 1 + }) + .andThen { + case _ => writer.close() + } +} + +// Usage +exportToFile( + SQLQuery(query = "SELECT * FROM products"), + "products.jsonl" +).foreach { count => + println(s"Exported $count documents") +} +``` + +--- + +### Pattern 2: Bulk Reindex + +```scala +def bulkReindex( + sourceQuery: SQLQuery, + targetIndex: String, + batchSize: Int = 1000 +): Future[Long] = { + client.scroll(sourceQuery) + .map(_._1) // Extract documents + .grouped(batchSize) + .mapAsync(1) { batch => + // Bulk index to target + val bulkRequest = batch.map { doc => + s"""{"index":{"_index":"$targetIndex"}} + |${compact(render(Extraction.decompose(doc)))} + |""".stripMargin + }.mkString + + client.bulkAsync(bulkRequest).map(_ => batch.size) + } + .runWith(Sink.fold(0L)(_ + _)) +} + +// Usage +bulkReindex( + SQLQuery(query = "SELECT * FROM old_products"), + "new_products" +).foreach { count => + println(s"Reindexed $count documents") +} +``` + +--- + +### Pattern 3: Data Validation + +```scala +case class ValidationResult( + valid: Long, + invalid: Long, + errors: Seq[String] +) + +def validateData(query: SQLQuery): Future[ValidationResult] = { + client.scrollAs[Product](query) + .map(_._1) + .runWith(Sink.fold(ValidationResult(0, 0, Seq.empty)) { (result, product) => + if (isValid(product)) { + result.copy(valid = result.valid + 1) + } else { + result.copy( + invalid = result.invalid + 1, + errors = result.errors :+ s"Invalid product: ${product.id}" + ) + } + }) +} + +def isValid(product: Product): Boolean = { + product.price > 0 && product.name.nonEmpty +} + +// Usage +validateData(SQLQuery(query = "SELECT * FROM products")).foreach { result => + println(s"Valid: ${result.valid}") + println(s"Invalid: ${result.invalid}") + if (result.errors.nonEmpty) { + println("Errors:") + result.errors.take(10).foreach(println) + } +} +``` + +--- + +### Pattern 4: Data Aggregation + +```scala +case class CategoryStats( + category: String, + count: Long, + totalValue: Double, + avgPrice: Double +) + +def aggregateByCategory(query: SQLQuery): Future[Map[String, CategoryStats]] = { + client.scrollAs[Product](query) + .map(_._1) + .runWith(Sink.fold(Map.empty[String, CategoryStats]) { (stats, product) => + val current = stats.getOrElse( + product.category, + CategoryStats(product.category, 0, 0.0, 0.0) + ) + + val updated = CategoryStats( + category = product.category, + count = current.count + 1, + totalValue = current.totalValue + product.price, + avgPrice = (current.totalValue + product.price) / (current.count + 1) + ) + + stats + (product.category -> updated) + }) +} + +// Usage +aggregateByCategory(SQLQuery(query = "SELECT * FROM products")).foreach { stats => + println("Category Statistics:") + stats.values.foreach { s => + println(s" ${s.category}:") + println(f" Count: ${s.count}") + println(f" Total Value: $$${s.totalValue}%,.2f") + println(f" Avg Price: $$${s.avgPrice}%.2f") + } +} +``` + +--- + +### Pattern 5: Data Transformation Pipeline + +```scala +case class RawOrder(id: String, customerId: String, total: Double, items: Seq[String]) +case class EnrichedOrder( + id: String, + customerId: String, + customerName: String, + total: Double, + itemCount: Int, + category: String +) + +def transformOrders(query: SQLQuery): Future[Seq[EnrichedOrder]] = { + client.scrollAs[RawOrder](query) + .map(_._1) + .mapAsync(parallelism = 4) { order => + // Enrich with customer data + fetchCustomerName(order.customerId).map { customerName => + EnrichedOrder( + id = order.id, + customerId = order.customerId, + customerName = customerName, + total = order.total, + itemCount = order.items.size, + category = categorizeOrder(order) + ) + } + } + .filter(_.itemCount > 0) // Filter empty orders + .runWith(Sink.seq) +} + +def fetchCustomerName(customerId: String): Future[String] = { + // Fetch from database or cache + Future.successful(s"Customer $customerId") +} + +def categorizeOrder(order: RawOrder): String = { + if (order.total > 1000) "premium" + else if (order.total > 100) "standard" + else "basic" +} + +// Usage +transformOrders(SQLQuery(query = "SELECT * FROM orders")).foreach { enriched => + println(s"Transformed ${enriched.size} orders") +} +``` + +--- + +## Summary + +The **Scroll API** provides: + +✅ **Automatic strategy selection** for optimal performance +✅ **Akka Streams integration** for reactive processing +✅ **Type-safe scrolling** with automatic deserialization +✅ **Built-in metrics tracking** for monitoring +✅ **Automatic error handling** with retry logic +✅ **Memory-efficient streaming** for large datasets +✅ **Flexible configuration** for different use cases + +**Key Features by Strategy:** + +| Feature | PIT + search_after | search_after | Classic Scroll | +|---------|-------------------|--------------|----------------| +| **ES Version** | 7.10+ | All | All | +| **Aggregations** | ❌ | ❌ | ✅ | +| **Consistent Snapshot** | ✅ | ❌ | ✅ | +| **Deep Pagination** | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐⭐ | +| **Performance** | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐⭐ | +| **Resource Usage** | Low | Low | Medium | +| **Timeout Issues** | ❌ | ❌ | ⚠️ | + +**When to Use:** + +- **PIT + search_after**: ES 7.10+, large datasets, no aggregations (recommended) +- **search_after**: ES < 7.10, large datasets, no aggregations +- **Classic scroll**: Any version with aggregations, or when consistent snapshot is required + +**Best Practices:** + +1. ✅ Choose appropriate batch size based on document size +2. ✅ Always set `maxDocuments` for safety +3. ✅ Use typed scrolling when possible +4. ✅ Handle backpressure with buffers +5. ✅ Monitor progress with metrics +6. ✅ Implement proper error handling +7. ✅ Stream to file for large result sets +8. ✅ Use parallel processing wisely +9. ✅ Optimize queries (select only needed fields) +10. ✅ Test with different scenarios + +**Performance Tips:** + +- 📊 Small documents (< 1KB): batch size 5000 +- 📊 Medium documents (1-10KB): batch size 1000 +- 📊 Large documents (> 10KB): batch size 100 +- 🚀 Use parallelism = number of CPU cores +- 💾 Stream to file for > 100K documents +- ⏱️ Add throttling for rate-limited operations + +--- + +[Back to index](README.md) | [Next: Aggregations API](aggregations.md) diff --git a/documentation/client/search.md b/documentation/client/search.md new file mode 100644 index 00000000..5c6b0ca3 --- /dev/null +++ b/documentation/client/search.md @@ -0,0 +1,2520 @@ +[Back to index](README.md) + +# SEARCH API + +## Table of Contents + +- [Core Concepts](#core-concepts) +- [Synchronous Search](#synchronous-search) + - [search](#search) + - [singleSearch](#singlesearch) + - [multiSearch](#multisearch) +- [Asynchronous Search](#asynchronous-search) + - [searchAsync](#searchasync) + - [singleSearchAsync](#singlesearchasync) + - [multiSearchAsync](#multisearchasync) +- [Search with Type Conversion](#search-with-type-conversion) + - [searchAs](#searchas) + - [singleSearchAs](#singlesearchas) + - [multisearchAs](#multisearchas) +- [Asynchronous Search with Type Conversion](#asynchronous-search-with-type-conversion) + - [searchAsyncAs](#searchasyncas) + - [singleSearchAsyncAs](#singlesearchasyncas) + - [multiSearchAsyncAs](#multisearchasyncas) +- [Implementation Requirements](#implementation-requirements) + - [executeSingleSearch](#executesinglesearch) + - [executeMultiSearch](#executemultisearch) + - [executeSingleSearchAsync](#executesinglesearchasync) + - [executeMultiSearchAsync](#executemultisearchasync) +- [Common Patterns](#common-patterns) +- [Performance Optimization](#performance-optimization) +- [Error Handling](#error-handling) +- [Testing Scenarios](#testing-scenarios) +- [Best Practices](#best-practices) +- [SQL Query Search](#sql-query-search) +- [SQL Query Patterns](#sql-query-patterns) +- [SQL Query Best Practices](#best-practices-for-sql-queries) +- [Summary](#summary) + +## Overview + +The **SearchApi** trait provides comprehensive search functionality for Elasticsearch with support for - [SQL Queries](../sql/README.md), native Elasticsearch queries, aggregations, and automatic type conversion. It offers both synchronous and asynchronous operations with unified error handling via `ElasticResult`. + +**Features:** +- **SQL query support** with automatic conversion to Elasticsearch DSL +- **Native Elasticsearch query execution** +- **Single and multi-search operations** +- **Automatic type conversion** to Scala case classes +- **Field aliasing** for query result mapping +- **Aggregation support** (SQL and Elasticsearch) +- **Synchronous and asynchronous operations** +- **Comprehensive error handling and validation** +- **Query validation** before execution + +**Dependencies:** +- Requires `ElasticConversion` for type conversion +- Requires `ElasticClientHelpers` for validation and utilities + +**Large Result Sets - Scroll API** + +For searching large datasets that don't fit in a single response, use the **dedicated Scroll API** instead of regular search methods. + +See [Scroll API](scroll.md) documentation for complete implementation + +**When to use Scroll API:** +- Retrieving more than 10,000 documents +- Exporting large datasets +- Processing all documents in an index +- Batch processing operations + +--- + +## Core Concepts + +### Query Types + +**1. SQL Query** +```scala +case class SQLQuery( + query: String, // SQL query string + score: Option[Double] = None // Optional minimum score +) + +// Example +val sqlQuery = SQLQuery( + query = "SELECT * FROM products WHERE price > 100", + score = Some(1.0) +) +``` + +**2. Elasticsearch Query** +```scala +case class ElasticQuery( + query: String, // JSON query + indices: Seq[String] // Target indices +) + +// Example +val elasticQuery = ElasticQuery( + query = """{"query": {"match": {"name": "laptop"}}}""", + indices = Seq("products") +) +``` + +**3. Multi-Search** +```scala +case class ElasticQueries( + queries: List[ElasticQuery] // Multiple queries +) +``` + +### Response Types + +```scala + +object AggregationType extends Enumeration { + type AggregationType = Value + val Count, Min, Max, Avg, Sum, FirstValue, LastValue, ArrayAgg = Value +} + +case class ClientAggregation( + aggName: String, // aggregation name + aggType: AggregationType.AggregationType, // aggregation type + distinct: Boolean // distinct values for multivalued aggregations +) { + def multivalued: Boolean = aggType == AggregationType.ArrayAgg + def singleValued: Boolean = !multivalued +} + +case class ElasticResponse( + query: String, // Original query + response: String, // Raw JSON response + fieldAliases: Map[String, String], // Field name mappings + aggregations: Map[String, ClientAggregation] // Aggregation definitions +) +``` + +--- + +## Synchronous Search + +### search + +Executes a search using an SQL query. + +**Signature:** + +```scala +def search(sql: SQLQuery): ElasticResult[ElasticResponse] +``` + +**Parameters:** +- `sql` - SQL query containing the search request + +**Returns:** +- `ElasticSuccess[ElasticResponse]` with search results +- `ElasticFailure` with error details (400 for invalid query) + +**Behavior:** +- Validates SQL query structure +- Converts SQL to Elasticsearch query +- Executes single or multi-search based on query type +- Returns raw Elasticsearch response + +**Examples:** + +```scala +// Basic SQL search +val sqlQuery = SQLQuery( + query = "SELECT * FROM products WHERE category = 'electronics'" +) + +client.search(sqlQuery) match { + case ElasticSuccess(response) => + println(s"✅ Found results: ${response.response}") + // Process raw JSON response + + case ElasticFailure(error) => + println(s"❌ Search failed: ${error.message}") +} + +// SQL with aggregations +val aggregationQuery = SQLQuery( + query = "SELECT category, AVG(price) FROM products GROUP BY category" +) + +client.search(aggregationQuery) match { + case ElasticSuccess(response) => + // Access aggregations + response.aggregations.foreach { case (name, agg) => + println(s"Aggregation: $name") + } + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} + +// Multi-search with SQL +val multiQuery = SQLQuery( + query = """ + SELECT * FROM products WHERE category = 'electronics' + UNION ALL + SELECT * FROM products WHERE category = 'books' + UNION ALL + SELECT * FROM products WHERE category = 'clothing' + """ +) + +client.search(multiQuery) match { + case ElasticSuccess(response) => + println(s"✅ Multi-search completed") + // Response contains combined results + + case ElasticFailure(error) => + println(s"❌ Multi-search failed: ${error.message}") +} + +// Error handling for invalid query +val invalidQuery = SQLQuery( + query = "INVALID SQL SYNTAX" +) + +client.search(invalidQuery) match { + case ElasticFailure(error) => + assert(error.message.contains("valid search request")) + assert(error.operation.contains("search")) +} + +// Monadic composition +val result = for { + response1 <- client.search(query1) + response2 <- client.search(query2) + combined = combineResponses(response1, response2) +} yield combined +``` + +--- + +### singleSearch + +Executes a single Elasticsearch query. + +**Signature:** + +```scala +def singleSearch( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] +): ElasticResult[ElasticResponse] +``` + +**Parameters:** +- `elasticQuery` - Elasticsearch query with target indices +- `fieldAliases` - Field name mappings for result conversion +- `aggregations` - Aggregation definitions for multivalued aggregations conversion + +**Returns:** +- `ElasticSuccess[ElasticResponse]` with search results +- `ElasticFailure` with error details (400 for invalid JSON) + +**Validation:** +- JSON syntax validation before execution + +**Examples:** + +```scala +// Basic search +val query = ElasticQuery( + query = """ + { + "query": { + "match": { + "name": "laptop" + } + } + } + """, + indices = Seq("products") +) + +client.singleSearch(query, Map.empty, Map.empty) match { + case ElasticSuccess(response) => + println(s"✅ Search completed in indices: ${query.indices.mkString(",")}") + + case ElasticFailure(error) => + println(s"❌ Search failed: ${error.message}") +} + +// Search with field aliases +val fieldAliases = Map( + "product_name" -> "name", + "product_price" -> "price" +) + +val queryWithAliases = ElasticQuery( + query = """{"query": {"match_all": {}}}""", + indices = Seq("products") +) + +client.singleSearch(queryWithAliases, fieldAliases, Map.empty) match { + case ElasticSuccess(response) => + // Field aliases applied to results + println(s"✅ Results with aliases: ${response.fieldAliases}") +} + +// Multi-index search +val multiIndexQuery = ElasticQuery( + query = """{"query": {"term": {"status": "active"}}}""", + indices = Seq("products", "inventory", "catalog") +) + +client.singleSearch(multiIndexQuery, Map.empty, Map.empty) + +// Complex query with filters +val complexQuery = ElasticQuery( + query = """ + { + "query": { + "bool": { + "must": [ + {"range": {"price": {"gte": 100, "lte": 1000}}}, + {"term": {"category": "electronics"}} + ], + "filter": [ + {"term": {"in_stock": true}} + ] + } + }, + "sort": [{"price": "asc"}], + "size": 100 + } + """, + indices = Seq("products") +) + +client.singleSearch(complexQuery, Map.empty, Map.empty) + +// Invalid JSON handling +val invalidQuery = ElasticQuery( + query = """{"query": INVALID_JSON}""", + indices = Seq("products") +) + +client.singleSearch(invalidQuery, Map.empty, Map.empty) match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.message.contains("Invalid query")) +} +``` + +--- + +### multiSearch + +Executes multiple Elasticsearch queries in a single request. + +**Signature:** + +```scala +def multiSearch( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] +): ElasticResult[ElasticResponse] +``` + +**Parameters:** +- `elasticQueries` - Multiple Elasticsearch queries +- `fieldAliases` - Field name mappings for result conversion +- `aggregations` - Aggregation definitions for multivalued aggregations conversion + +**Returns:** +- `ElasticSuccess[ElasticResponse]` with combined results +- `ElasticFailure` with error details (400 for invalid queries) + +**Validation:** +- Validates all queries before execution +- Returns errors for any invalid query + +**Examples:** + +```scala +// Basic multi-search +val queries = ElasticQueries( + queries = List( + ElasticQuery( + query = """{"query": {"match": {"category": "electronics"}}}""", + indices = Seq("products") + ), + ElasticQuery( + query = """{"query": {"match": {"category": "books"}}}""", + indices = Seq("products") + ), + ElasticQuery( + query = """{"query": {"match": {"status": "completed"}}}""", + indices = Seq("orders") + ) + ) +) + +client.multiSearch(queries, Map.empty, Map.empty) match { + case ElasticSuccess(response) => + println(s"✅ Multi-search completed with ${queries.queries.size} queries") + // Response contains combined results from all queries + + case ElasticFailure(error) => + println(s"❌ Multi-search failed: ${error.message}") +} + +// Multi-search across different indices +val crossIndexQueries = ElasticQueries( + queries = List( + ElasticQuery( + query = """{"query": {"term": {"user_id": "user-123"}}}""", + indices = Seq("orders") + ), + ElasticQuery( + query = """{"query": {"term": {"user_id": "user-123"}}}""", + indices = Seq("reviews") + ), + ElasticQuery( + query = """{"query": {"term": {"user_id": "user-123"}}}""", + indices = Seq("wishlist") + ) + ) +) + +client.multiSearch(crossIndexQueries, Map.empty, Map.empty) match { + case ElasticSuccess(response) => + println("✅ Retrieved user data from multiple indices") +} + +// Multi-search with aggregations +val aggregationQueries = ElasticQueries( + queries = List( + ElasticQuery( + query = """ + { + "query": {"match_all": {}}, + "aggs": {"avg_price": {"avg": {"field": "price"}}} + } + """, + indices = Seq("products") + ), + ElasticQuery( + query = """ + { + "query": {"match_all": {}}, + "aggs": {"total_orders": {"value_count": {"field": "order_id"}}} + } + """, + indices = Seq("orders") + ) + ) +) + +client.multiSearch(aggregationQueries, Map.empty, Map.empty) + +// Error handling for invalid queries +val mixedQueries = ElasticQueries( + queries = List( + ElasticQuery( + query = """{"query": {"match": {"name": "valid"}}}""", + indices = Seq("products") + ), + ElasticQuery( + query = """{"query": INVALID}""", + indices = Seq("products") + ) + ) +) + +client.multiSearch(mixedQueries, Map.empty, Map.empty) match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.message.contains("Invalid queries")) + // Contains information about which query failed +} +``` + +--- + +## Asynchronous Search + +### searchAsync + +Asynchronously executes a search using an SQL query. + +**Signature:** + +```scala +def searchAsync( + sqlQuery: SQLQuery +)(implicit ec: ExecutionContext): Future[ElasticResult[ElasticResponse]] +``` + +**Parameters:** +- `sqlQuery` - SQL query containing the search request +- `ec` - Implicit ExecutionContext + +**Returns:** +- `Future[ElasticResult[ElasticResponse]]` that completes with search results + +**Examples:** + +```scala +import scala.concurrent.ExecutionContext.Implicits.global + +// Basic async search +val sqlQuery = SQLQuery( + query = "SELECT * FROM products WHERE price > 100" +) + +client.searchAsync(sqlQuery).onComplete { + case Success(ElasticSuccess(response)) => + println(s"✅ Async search completed") + + case Success(ElasticFailure(error)) => + println(s"❌ Search failed: ${error.message}") + + case Failure(ex) => + println(s"❌ Future failed: ${ex.getMessage}") +} + +// Chained async operations +val result: Future[ElasticResult[CombinedData]] = for { + response1 <- client.searchAsync(query1) + response2 <- client.searchAsync(query2) + combined = combineResults(response1, response2) +} yield combined + +// Parallel async searches +val searches = List(query1, query2, query3) + +val futures = searches.map(query => client.searchAsync(query)) + +Future.sequence(futures).map { results => + results.foreach { + case ElasticSuccess(response) => println(s"✅ Success") + case ElasticFailure(error) => println(s"❌ Failed: ${error.message}") + } +} +``` + +--- + +### singleSearchAsync + +Asynchronously executes a single Elasticsearch query. + +**Signature:** + +```scala +def singleSearchAsync( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] +)(implicit ec: ExecutionContext): Future[ElasticResult[ElasticResponse]] +``` + +**Examples:** + +```scala +val query = ElasticQuery( + query = """{"query": {"match": {"name": "laptop"}}}""", + indices = Seq("products") +) + +client.singleSearchAsync(query, Map.empty, Map.empty).foreach { + case ElasticSuccess(response) => + println("✅ Async search completed") + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} +``` + +--- + +### multiSearchAsync + +Asynchronously executes multiple Elasticsearch queries. + +**Signature:** + +```scala +def multiSearchAsync( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] +)(implicit ec: ExecutionContext): Future[ElasticResult[ElasticResponse]] +``` + +**Examples:** + +```scala +val queries = ElasticQueries( + queries = List(query1, query2, query3) +) + +client.multiSearchAsync(queries, Map.empty, Map.empty).foreach { + case ElasticSuccess(response) => + println(s"✅ Multi-search completed with ${queries.queries.size} queries") + + case ElasticFailure(error) => + println(s"❌ Multi-search failed: ${error.message}") +} +``` + +--- + +## Search with Type Conversion + +### searchAs + +Searches and automatically converts results to typed entities using an SQL query. + +**Signature:** + +```scala +def searchAs[U]( + sqlQuery: SQLQuery +)(implicit m: Manifest[U], formats: Formats): ElasticResult[Seq[U]] +``` + +**Parameters:** +- `sqlQuery` - SQL query +- `m` - Implicit Manifest for type information +- `formats` - Implicit JSON serialization formats + +**Returns:** +- `ElasticSuccess[Seq[U]]` with typed entities +- `ElasticFailure` with conversion or search errors + +**Examples:** + +```scala +import org.json4s.DefaultFormats + +implicit val formats: Formats = DefaultFormats + +// Domain model +case class Product( + id: String, + name: String, + price: Double, + category: String +) + +// Search and convert to typed entities +val sqlQuery = SQLQuery( + query = "SELECT * FROM products WHERE category = 'electronics'" +) + +client.searchAs[Product](sqlQuery) match { + case ElasticSuccess(products) => + println(s"✅ Found ${products.size} products") + products.foreach { product => + println(s"Product: ${product.name}, Price: ${product.price}") + } + + case ElasticFailure(error) => + println(s"❌ Search failed: ${error.message}") +} + +// Search with complex types +case class Order( + id: String, + userId: String, + items: List[OrderItem], + total: Double, + status: String +) + +case class OrderItem(productId: String, quantity: Int, price: Double) + +val orderQuery = SQLQuery( + query = "SELECT * FROM orders WHERE status = 'completed'" +) + +client.searchAs[Order](orderQuery) match { + case ElasticSuccess(orders) => + val totalRevenue = orders.map(_.total).sum + println(s"✅ Total revenue: $totalRevenue") + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} + +// Error handling for conversion failures +client.searchAs[Product](sqlQuery) match { + case ElasticFailure(error) if error.operation.contains("convertToEntities") => + println(s"❌ Type conversion failed: ${error.message}") + error.cause.foreach(ex => println(s"Cause: ${ex.getMessage}")) + + case ElasticFailure(error) => + println(s"❌ Search failed: ${error.message}") +} + +// Monadic composition with type conversion +val result: ElasticResult[List[EnrichedProduct]] = for { + products <- client.searchAs[Product](productQuery) + enriched = products.map(enrichProduct) +} yield enriched.toList +``` + +--- + +### singleSearchAs + +Searches and converts results to typed entities using an Elasticsearch query. + +**Signature:** + +```scala +def singleSearchAs[U]( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] +)(implicit m: Manifest[U], formats: Formats): ElasticResult[Seq[U]] +``` + +**Examples:** + +```scala +case class Product(id: String, name: String, price: Double) + +implicit val formats: Formats = DefaultFormats + +val query = ElasticQuery( + query = """ + { + "query": { + "range": { + "price": { + "gte": 100, + "lte": 1000 + } + } + } + } + """, + indices = Seq("products") +) + +client.singleSearchAs[Product](query, Map.empty, Map.empty) match { + case ElasticSuccess(products) => + println(s"✅ Found ${products.size} products in price range") + products.foreach(p => println(s"${p.name}: ${p.price}")) + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} + +// With field aliases for mapping +val fieldAliases = Map( + "product_name" -> "name", + "product_price" -> "price", + "product_id" -> "id" +) + +client.singleSearchAs[Product](query, fieldAliases, Map.empty) match { + case ElasticSuccess(products) => + println(s"✅ Converted with field aliases") +} +``` + +--- + +### multisearchAs + +Multi-search with automatic type conversion. + +**Signature:** + +```scala +def multisearchAs[U]( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] +)(implicit m: Manifest[U], formats: Formats): ElasticResult[Seq[U]] +``` + +**Examples:** + +```scala +case class Product(id: String, name: String, price: Double, category: String) + +implicit val formats: Formats = DefaultFormats + +val queries = ElasticQueries( + queries = List( + ElasticQuery( + query = """{"query": {"term": {"category": "electronics"}}}""", + indices = Seq("products") + ), + ElasticQuery( + query = """{"query": {"term": {"category": "books"}}}""", + indices = Seq("products") + ) + ) +) + +client.multisearchAs[Product](queries, Map.empty, Map.empty) match { + case ElasticSuccess(products) => + println(s"✅ Found ${products.size} products across categories") + val byCategory = products.groupBy(_.category) + byCategory.foreach { case (category, items) => + println(s"$category: ${items.size} items") + } + + case ElasticFailure(error) => + println(s"❌ Multi-search failed: ${error.message}") +} +``` + +--- + +## Asynchronous Search with Type Conversion + +### searchAsyncAs + +Asynchronously searches and converts results to typed entities. + +**Signature:** + +```scala +def searchAsyncAs[U]( + sqlQuery: SQLQuery +)(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats +): Future[ElasticResult[Seq[U]]] +``` + +**Examples:** + +```scala +import scala.concurrent.ExecutionContext.Implicits.global + +case class Product(id: String, name: String, price: Double) + +implicit val formats: Formats = DefaultFormats + +val sqlQuery = SQLQuery( + query = "SELECT * FROM products WHERE price > 100" +) + +client.searchAsyncAs[Product](sqlQuery).onComplete { + case Success(ElasticSuccess(products)) => + println(s"✅ Found ${products.size} products") + products.foreach(p => println(s"${p.name}: ${p.price}")) + + case Success(ElasticFailure(error)) => + println(s"❌ Search failed: ${error.message}") + + case Failure(ex) => + println(s"❌ Future failed: ${ex.getMessage}") +} + +// Chained async operations with type conversion +val result: Future[ElasticResult[Summary]] = for { + products <- client.searchAsyncAs[Product](productQuery) + orders <- client.searchAsyncAs[Order](orderQuery) + summary = createSummary(products, orders) +} yield summary + +// Parallel async searches with conversion +val futures = List( + client.searchAsyncAs[Product](query1), + client.searchAsyncAs[Order](query2), + client.searchAsyncAs[User](query3) +) + +Future.sequence(futures).map { results => + results.foreach { + case ElasticSuccess(items) => println(s"✅ Found ${items.size} items") + case ElasticFailure(error) => println(s"❌ Failed: ${error.message}") + } +} +``` + +--- + +### singleSearchAsyncAs + +Asynchronously searches and converts using an Elasticsearch query. + +**Signature:** + +```scala +def singleSearchAsyncAs[U]( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] +)(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats +): Future[ElasticResult[Seq[U]]] +``` + +**Examples:** + +```scala +val query = ElasticQuery( + query = """{"query": {"match": {"category": "electronics"}}}""", + indices = Seq("products") +) + +client.singleSearchAsyncAs[Product](query, Map.empty, Map.empty).foreach { + case ElasticSuccess(products) => + println(s"✅ Found ${products.size} electronics") + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} +``` + +--- + +### multiSearchAsyncAs + +Asynchronously executes multi-search with type conversion. + +**Signature:** + +```scala +def multiSearchAsyncAs[U]( + elasticQueries: ElasticQueries, + fieldAliases: Map[String, String], + aggregations: Map[String, ClientAggregation] +)(implicit + m: Manifest[U], + ec: ExecutionContext, + formats: Formats +): Future[ElasticResult[Seq[U]]] +``` + +**Examples:** + +```scala +val queries = ElasticQueries( + queries = List(query1, query2, query3) +) + +client.multiSearchAsyncAs[Product](queries, Map.empty, Map.empty).foreach { + case ElasticSuccess(products) => + println(s"✅ Multi-search returned ${products.size} products") + + case ElasticFailure(error) => + println(s"❌ Multi-search failed: ${error.message}") +} +``` + +--- + +## Implementation Requirements + +### executeSingleSearch + +```scala +private[client] def executeSingleSearch( + elasticQuery: ElasticQuery +): ElasticResult[Option[String]] +``` + +**Implementation Example:** + +```scala +private[client] def executeSingleSearch( + elasticQuery: ElasticQuery +): ElasticResult[Option[String]] = { + executeRestAction[SearchResponse, Option[String]]( + operation = "search", + index = Some(elasticQuery.indices.mkString(",")) + )( + action = { + val request = new SearchRequest(elasticQuery.indices: _*) + request.source(new SearchSourceBuilder().query( + QueryBuilders.wrapperQuery(elasticQuery.query) + )) + client.search(request, RequestOptions.DEFAULT) + } + )( + transformer = resp => Some(resp.toString) + ) +} +``` + +--- + +### executeMultiSearch + +```scala +private[client] def executeMultiSearch( + elasticQueries: ElasticQueries +): ElasticResult[Option[String]] +``` + +**Implementation Example:** + +```scala +private[client] def executeMultiSearch( + elasticQueries: ElasticQueries +): ElasticResult[Option[String]] = { + executeRestAction[MultiSearchResponse, Option[String]]( + operation = "multiSearch", + index = None + )( + action = { + val request = new MultiSearchRequest() + + elasticQueries.queries.foreach { query => + val searchRequest = new SearchRequest(query.indices: _*) + searchRequest.source(new SearchSourceBuilder().query( + QueryBuilders.wrapperQuery(query.query) + )) + request.add(searchRequest) + } + + client.msearch(request, RequestOptions.DEFAULT) + } + )( + transformer = resp => Some(resp.toString) + ) +} +``` + +--- + +### executeSingleSearchAsync + +```scala +private[client] def executeSingleSearchAsync( + elasticQuery: ElasticQuery +)(implicit ec: ExecutionContext): Future[ElasticResult[Option[String]]] +``` + +**Implementation Example:** + +```scala +private[client] def executeSingleSearchAsync( + elasticQuery: ElasticQuery +)(implicit ec: ExecutionContext): Future[ElasticResult[Option[String]]] = { + val promise = Promise[ElasticResult[Option[String]]]() + + val request = new SearchRequest(elasticQuery.indices: _*) + request.source(new SearchSourceBuilder().query( + QueryBuilders.wrapperQuery(elasticQuery.query) + )) + + client.searchAsync( + request, + RequestOptions.DEFAULT, + new ActionListener[SearchResponse] { + override def onResponse(response: SearchResponse): Unit = { + promise.success(ElasticSuccess(Some(response.toString))) + } + + override def onFailure(e: Exception): Unit = { + promise.success(ElasticFailure(ElasticError( + message = s"Async search failed: ${e.getMessage}", + operation = Some("searchAsync"), + index = Some(elasticQuery.indices.mkString(",")), + cause = Some(e) + ))) + } + } + ) + + promise.future +} +``` + +--- + +### executeMultiSearchAsync + +```scala +private[client] def executeMultiSearchAsync( + elasticQueries: ElasticQueries +)(implicit ec: ExecutionContext): Future[ElasticResult[Option[String]]] +``` + +**Implementation Example:** + +```scala +private[client] def executeMultiSearchAsync( + elasticQueries: ElasticQueries +)(implicit ec: ExecutionContext): Future[ElasticResult[Option[String]]] = { + val promise = Promise[ElasticResult[Option[String]]]() + + val request = new MultiSearchRequest() + elasticQueries.queries.foreach { query => + val searchRequest = new SearchRequest(query.indices: _*) + searchRequest.source(new SearchSourceBuilder().query( + QueryBuilders.wrapperQuery(query.query) + )) + request.add(searchRequest) + } + + client.msearchAsync( + request, + RequestOptions.DEFAULT, + new ActionListener[MultiSearchResponse] { + override def onResponse(response: MultiSearchResponse): Unit = { + promise.success(ElasticSuccess(Some(response.toString))) + } + + override def onFailure(e: Exception): Unit = { + promise.success(ElasticFailure(ElasticError( + message = s"Async multi-search failed: ${e.getMessage}", + operation = Some("multiSearchAsync"), + cause = Some(e) + ))) + } + } + ) + + promise.future +} +``` + +--- + +### sqlSearchRequestToJsonQuery + +```scala +private[client] implicit def sqlSearchRequestToJsonQuery( + sqlSearch: SQLSearchRequest +): String +``` + +**Implementation Example:** + +```scala +private[client] implicit def sqlSearchRequestToJsonQuery( + sqlSearch: SQLSearchRequest +): String = { + import org.json4s._ + import org.json4s.jackson.JsonMethods._ + + implicit val formats: Formats = DefaultFormats + + // Convert SQL search request to Elasticsearch JSON query + val queryJson = ("query" -> sqlSearch.query) ~ + ("size" -> sqlSearch.size) ~ + ("from" -> sqlSearch.from) + + compact(render(queryJson)) +} +``` + +--- + +## Common Patterns + +### Repository Pattern with Search + +```scala +trait SearchRepository[T] extends ElasticClientDelegator { + implicit val formats: Formats = DefaultFormats + + def findAll(implicit + m: Manifest[T] + ): ElasticResult[Seq[T]] = { + val indexName = m.runtimeClass.getSimpleName.toLowerCase + searchAs[T](SQLQuery(s"SELECT * FROM $indexName")).map(_.toSeq) + } + + def findById(id: String)(implicit + m: Manifest[T] + ): ElasticResult[Option[T]] = { + val indexName = m.runtimeClass.getSimpleName.toLowerCase + val query = ElasticQuery( + query = s"""{"query": {"term": {"_id": "$id"}}}""", + indices = Seq(indexName) + ) + singleSearchAs[T](query, Map.empty, Map.empty).map(_.headOption) + } + + def search(query: SQLQuery)(implicit + m: Manifest[T] + ): ElasticResult[Seq[T]] = { + val indexName = m.runtimeClass.getSimpleName.toLowerCase + searchAs[T](query) + } +} + +// Usage +case class Product(id: String, name: String, price: Double, category: String) + +object ProductRepository extends SearchRepository[Product] { + lazy val delegate: ElasticClientApi = ElasticClientFactory.create() + + def findByCategory(category: String): ElasticResult[Seq[Product]] = { + search(SQLQuery(s"SELECT * FROM product WHERE category = '$category'")) + } + + def findByPriceRange(min: Double, max: Double): ElasticResult[Seq[Product]] = { + search(SQLQuery(s"SELECT * FROM product WHERE price BETWEEN $min AND $max")) + } +} +// Using the repository +ProductRepository.findByCategory("electronics") match { + case ElasticSuccess(products) => + println(s"Found ${products.size} electronics") + case ElasticFailure(error) => + println(s"Error: ${error.message}") +} +``` + +--- + +### Pagination Pattern + +```scala +case class Page[T]( + items: Seq[T], + total: Long, + page: Int, + pageSize: Int +) { + def totalPages: Int = Math.ceil(total.toDouble / pageSize).toInt + def hasNext: Boolean = page < totalPages + def hasPrevious: Boolean = page > 1 +} + +def searchWithPagination[T]( + query: String, + page: Int = 1, + pageSize: Int = 20 +)(implicit + client: ElasticClient, + m: Manifest[T], + formats: Formats +): ElasticResult[Page[T]] = { + + val from = (page - 1) * pageSize + val indexName = m.runtimeClass.getSimpleName.toLowerCase + + val elasticQuery = ElasticQuery( + query = s""" + { + "query": $query, + "from": $from, + "size": $pageSize + } + """, + indices = Seq(indexName) + ) + + for { + response <- client.singleSearch(elasticQuery, Map.empty, Map.empty) + entities <- client.convertToEntities[T](response) + total = extractTotal(response.response) + } yield Page(entities, total, page, pageSize) +} + +// Usage +searchWithPagination[Product]( + query = """{"match": {"category": "electronics"}}""", + page = 1, + pageSize = 20 +) match { + case ElasticSuccess(page) => + println(s"Page ${page.page} of ${page.totalPages}") + println(s"Items: ${page.items.size} / ${page.total}") + page.items.foreach(println) + + case ElasticFailure(error) => + println(s"Error: ${error.message}") +} +``` + +--- + +### Full-Text Search Pattern + +```scala +def fullTextSearch[T]( + searchText: String, + fields: Seq[String], + fuzzy: Boolean = false, + boost: Map[String, Double] = Map.empty +)(implicit + client: ElasticClient, + m: Manifest[T], + formats: Formats +): ElasticResult[Seq[T]] = { + + val indexName = m.runtimeClass.getSimpleName.toLowerCase + + val fieldQueries = fields.map { field => + val boostValue = boost.getOrElse(field, 1.0) + val matchType = if (fuzzy) "match" else "match_phrase" + s""" + { + "$matchType": { + "$field": { + "query": "$searchText", + "boost": $boostValue + } + } + } + """ + }.mkString(",") + + val query = ElasticQuery( + query = s""" + { + "query": { + "bool": { + "should": [$fieldQueries], + "minimum_should_match": 1 + } + } + } + """, + indices = Seq(indexName) + ) + + client.singleSearchAs[T](query, Map.empty, Map.empty) +} + +// Usage +fullTextSearch[Product]( + searchText = "wireless bluetooth headphones", + fields = Seq("name", "description", "tags"), + fuzzy = true, + boost = Map( + "name" -> 3.0, + "description" -> 1.0, + "tags" -> 2.0 + ) +) match { + case ElasticSuccess(products) => + println(s"Found ${products.size} matching products") + products.foreach(p => println(s"${p.name} - ${p.price}")) + + case ElasticFailure(error) => + println(s"Search failed: ${error.message}") +} +``` + +--- + +### Filter and Sort Pattern + +```scala +case class SearchCriteria( + filters: Map[String, Any] = Map.empty, + rangeFilters: Map[String, (Option[Double], Option[Double])] = Map.empty, + sortBy: Option[String] = None, + sortOrder: String = "asc", + size: Int = 100 +) + +def advancedSearch[T]( + criteria: SearchCriteria +)(implicit + client: ElasticClient, + m: Manifest[T], + formats: Formats +): ElasticResult[Seq[T]] = { + + val indexName = m.runtimeClass.getSimpleName.toLowerCase + + // Build filter clauses + val termFilters = criteria.filters.map { case (field, value) => + s"""{"term": {"$field": "$value"}}""" + }.mkString(",") + + val rangeFilters = criteria.rangeFilters.map { case (field, (min, max)) => + val minClause = min.map(v => s""""gte": $v""").getOrElse("") + val maxClause = max.map(v => s""""lte": $v""").getOrElse("") + val clauses = Seq(minClause, maxClause).filter(_.nonEmpty).mkString(",") + s"""{"range": {"$field": {$clauses}}}""" + }.mkString(",") + + val allFilters = Seq(termFilters, rangeFilters) + .filter(_.nonEmpty) + .mkString(",") + + // Build sort clause + val sortClause = criteria.sortBy.map { field => + s""""sort": [{"$field": "${criteria.sortOrder}"}]""" + }.getOrElse("") + + val query = ElasticQuery( + query = s""" + { + "query": { + "bool": { + "filter": [$allFilters] + } + }, + $sortClause, + "size": ${criteria.size} + } + """, + indices = Seq(indexName) + ) + + client.singleSearchAs[T](query, Map.empty, Map.empty) +} + +// Usage +val criteria = SearchCriteria( + filters = Map( + "category" -> "electronics", + "brand" -> "Sony" + ), + rangeFilters = Map( + "price" -> (Some(100.0), Some(500.0)), + "rating" -> (Some(4.0), None) + ), + sortBy = Some("price"), + sortOrder = "asc", + size = 50 +) + +advancedSearch[Product](criteria) match { + case ElasticSuccess(products) => + println(s"Found ${products.size} products matching criteria") + products.foreach(p => println(s"${p.name}: ${p.price}")) + + case ElasticFailure(error) => + println(s"Search failed: ${error.message}") +} +``` + +--- + +## Performance Optimization + +### Query Caching + +```scala +import scala.collection.concurrent.TrieMap + +class CachedSearchApi(client: ElasticClient) { + private val cache = TrieMap[String, (ElasticResponse, Long)]() + private val cacheTTL = 5 * 60 * 1000 // 5 minutes + + def searchWithCache( + query: ElasticQuery, + fieldAliases: Map[String, String] = Map.empty, + aggregations: Map[String, ClientAggregation] = Map.empty + ): ElasticResult[ElasticResponse] = { + + val cacheKey = s"${query.indices.mkString(",")}:${query.query}" + val now = System.currentTimeMillis() + + cache.get(cacheKey) match { + case Some((response, timestamp)) if (now - timestamp) < cacheTTL => + logger.debug(s"✅ Cache hit for query: $cacheKey") + ElasticResult.success(response) + + case _ => + logger.debug(s"❌ Cache miss for query: $cacheKey") + client.singleSearch(query, fieldAliases, aggregations) match { + case success @ ElasticSuccess(response) => + cache.put(cacheKey, (response, now)) + success + case failure => failure + } + } + } + + def clearCache(): Unit = cache.clear() + + def removeCacheEntry(query: ElasticQuery): Unit = { + val cacheKey = s"${query.indices.mkString(",")}:${query.query}" + cache.remove(cacheKey) + } +} + +// Usage +val cachedSearch = new CachedSearchApi(client) + +// First call - hits Elasticsearch +cachedSearch.searchWithCache(query) + +// Second call - returns cached result +cachedSearch.searchWithCache(query) +``` + +--- + +## Error Handling + +### Query Validation Errors + +```scala +// Invalid JSON query +val invalidQuery = ElasticQuery( + query = """{"query": INVALID_JSON}""", + indices = Seq("products") +) + +client.singleSearch(invalidQuery, Map.empty, Map.empty) match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.message.contains("Invalid query")) + assert(error.operation.contains("search")) + assert(error.index.contains("products")) +} +``` + +--- + +### Type Conversion Errors + +```scala +case class Product(id: String, name: String, price: Double) + +val query = ElasticQuery( + query = """{"query": {"match_all": {}}}""", + indices = Seq("products") +) + +client.singleSearchAs[Product](query, Map.empty, Map.empty) match { + case ElasticFailure(error) if error.operation.contains("convertToEntities") => + println(s"❌ Type conversion failed: ${error.message}") + println(s"Target type: Product") + error.cause.foreach { ex => + println(s"Cause: ${ex.getMessage}") + println(s"Stack trace: ${ex.getStackTrace.mkString("\n")}") + } + + case ElasticFailure(error) => + println(s"❌ Search failed: ${error.message}") +} +``` + +--- + +### Multi-Search Partial Failures + +```scala +val queries = ElasticQueries( + queries = List( + ElasticQuery( + query = """{"query": {"match": {"name": "valid"}}}""", + indices = Seq("products") + ), + ElasticQuery( + query = """{"query": INVALID}""", + indices = Seq("products") + ), + ElasticQuery( + query = """{"query": {"match": {"name": "also-valid"}}}""", + indices = Seq("products") + ) + ) +) + +client.multiSearch(queries, Map.empty, Map.empty) match { + case ElasticFailure(error) => + // Error message contains information about all invalid queries + assert(error.message.contains("Invalid queries")) + assert(error.statusCode.contains(400)) + println(s"Failed queries: ${error.message}") +} +``` + +--- + +## Testing Scenarios + +### Test Basic Search + +```scala +def testBasicSearch()(implicit client: ElasticClient): Unit = { + val testIndex = "test-search" + + // Setup: Index test documents + client.createIndex(testIndex) + client.index(testIndex, "1", """{"name": "Product 1", "price": 100}""") + client.index(testIndex, "2", """{"name": "Product 2", "price": 200}""") + client.refresh(testIndex) + + // Test search + val query = ElasticQuery( + query = """{"query": {"match_all": {}}}""", + indices = Seq(testIndex) + ) + + client.singleSearch(query, Map.empty, Map.empty) match { + case ElasticSuccess(response) => + assert(response.query.contains("match_all")) + println("✅ Basic search test passed") + + case ElasticFailure(error) => + throw new AssertionError(s"Search failed: ${error.message}") + } + + // Cleanup + client.deleteIndex(testIndex) +} +``` + +--- + +### Test Search with Type Conversion + +```scala +def testSearchWithConversion()(implicit client: ElasticClient): Unit = { + case class TestProduct(name: String, price: Double) + + implicit val formats: Formats = DefaultFormats + + val testIndex = "test-conversion" + + // Setup + client.createIndex(testIndex) + client.index(testIndex, "1", """{"name": "Laptop", "price": 999.99}""") + client.index(testIndex, "2", """{"name": "Mouse", "price": 29.99}""") + client.refresh(testIndex) + + // Test + val query = ElasticQuery( + query = """{"query": {"match_all": {}}}""", + indices = Seq(testIndex) + ) + + client.singleSearchAs[TestProduct](query, Map.empty, Map.empty) match { + case ElasticSuccess(products) => + assert(products.size == 2, s"Expected 2 products, got ${products.size}") + assert(products.exists(_.name == "Laptop")) + assert(products.exists(_.name == "Mouse")) + println("✅ Search with conversion test passed") + + case ElasticFailure(error) => + throw new AssertionError(s"Search failed: ${error.message}") + } + + // Cleanup + client.deleteIndex(testIndex) +} +``` + +--- + +### Test Multi-Search + +```scala +def testMultiSearch()(implicit client: ElasticClient): Unit = { + val testIndex = "test-multi-search" + + // Setup + client.createIndex(testIndex) + client.index(testIndex, "1", """{"category": "electronics", "name": "Laptop"}""") + client.index(testIndex, "2", """{"category": "books", "name": "Novel"}""") + client.refresh(testIndex) + + // Test + val queries = ElasticQueries( + queries = List( + ElasticQuery( + query = """{"query": {"term": {"category": "electronics"}}}""", + indices = Seq(testIndex) + ), + ElasticQuery( + query = """{"query": {"term": {"category": "books"}}}""", + indices = Seq(testIndex) + ) + ) + ) + + client.multiSearch(queries, Map.empty, Map.empty) match { + case ElasticSuccess(response) => + println("✅ Multi-search test passed") + + case ElasticFailure(error) => + throw new AssertionError(s"Multi-search failed: ${error.message}") + } + + // Cleanup + client.deleteIndex(testIndex) +} +``` + +--- + +### Test Async Search + +```scala +def testAsyncSearch()(implicit + client: ElasticClient, + ec: ExecutionContext +): Future[Unit] = { + val testIndex = "test-async-search" + + for { + // Setup + _ <- client.createIndexAsync(testIndex) + _ <- client.indexAsync(testIndex, "1", """{"name": "Test Product"}""") + _ <- client.refreshAsync(testIndex) + + // Test + query = ElasticQuery( + query = """{"query": {"match_all": {}}}""", + indices = Seq(testIndex) + ) + result <- client.singleSearchAsync(query, Map.empty, Map.empty) + + _ = result match { + case ElasticSuccess(response) => + println("✅ Async search test passed") + case ElasticFailure(error) => + throw new AssertionError(s"Async search failed: ${error.message}") + } + + // Cleanup + _ <- client.deleteIndexAsync(testIndex) + } yield () +} +``` + +--- + +## Best Practices + +**1. Use Type-Safe Search Methods** + +```scala +// ✅ Good - type-safe with automatic conversion +case class Product(id: String, name: String, price: Double) + +client.singleSearchAs[Product](query, Map.empty, Map.empty) match { + case ElasticSuccess(products) => products.foreach(println) + case ElasticFailure(error) => println(error.message) +} + +// ❌ Avoid - manual JSON parsing +client.singleSearch(query, Map.empty, Map.empty) match { + case ElasticSuccess(response) => + // Manual JSON parsing required + val json = parse(response.response) + // Error-prone extraction +} +``` + +--- + +**2. Validate Queries Before Execution** + +```scala +// ✅ Good - validation happens automatically +client.singleSearch(query, Map.empty, Map.empty) + +// ✅ Good - additional custom validation +def validateAndSearch(query: ElasticQuery): ElasticResult[ElasticResponse] = { + if (query.indices.isEmpty) { + return ElasticResult.failure(ElasticError( + message = "No indices specified", + statusCode = Some(400) + )) + } + + client.singleSearch(query, Map.empty, Map.empty) +} +``` + +--- + +**3. Use Async for Multiple Searches** + +```scala +// ✅ Good - parallel async searches +val futures = List(query1, query2, query3).map { query => + client.singleSearchAsync(query, Map.empty, Map.empty) +} + +Future.sequence(futures).map { results => + results.foreach { + case ElasticSuccess(response) => println("Success") + case ElasticFailure(error) => println(s"Failed: ${error.message}") + } +} + +// ❌ Avoid - sequential blocking searches +val result1 = client.singleSearch(query1, Map.empty, Map.empty) +val result2 = client.singleSearch(query2, Map.empty, Map.empty) +val result3 = client.singleSearch(query3, Map.empty, Map.empty) +``` + +--- + +**4. Handle Field Aliases Properly** + +```scala +// ✅ Good - use field aliases for mapping +val fieldAliases = Map( + "product_name" -> "name", + "product_price" -> "price", + "product_id" -> "id" +) + +client.singleSearchAs[Product](query, fieldAliases, Map.empty) + +// ❌ Avoid - expecting exact field names +client.singleSearchAs[Product](query, Map.empty, Map.empty) +// May fail if ES field names don't match case class fields +``` + +--- + +**5. Use Multi-Search for Related Queries** + +```scala +// ✅ Good - single multi-search request +val queries = ElasticQueries( + queries = List(query1, query2, query3) +) +client.multiSearch(queries, Map.empty, Map.empty) + +// ❌ Avoid - multiple single searches +client.singleSearch(query1, Map.empty, Map.empty) +client.singleSearch(query2, Map.empty, Map.empty) +client.singleSearch(query3, Map.empty, Map.empty) +``` + +--- + +## SQL Query Search + +### Overview + +The **SQL Query Search** is a key feature of this API, allowing you to query Elasticsearch using familiar SQL syntax instead of complex JSON DSL. The API automatically converts SQL queries to Elasticsearch queries. + +**Benefits:** +- **Familiar syntax** for developers with SQL background +- **Simpler queries** compared to Elasticsearch JSON DSL +- **Automatic conversion** to optimized Elasticsearch queries +- **Full support** for WHERE, ORDER BY, LIMIT, GROUP BY, HAVING, UNNEST, and aggregations +- **Type-safe results** with automatic conversion to Scala case classes + +### SQL Query Examples + +**Basic SELECT** + +```scala +case class Product(id: String, name: String, price: Double, category: String) + +implicit val formats: Formats = DefaultFormats + +// Simple SELECT with WHERE clause +val query1 = SQLQuery( + query = "SELECT * FROM products WHERE category = 'electronics'" +) + +client.searchAs[Product](query1) match { + case ElasticSuccess(products) => + println(s"✅ Found ${products.size} electronics") + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} + +// Multiple conditions +val query2 = SQLQuery( + query = """ + SELECT * FROM products + WHERE category = 'electronics' + AND price > 100 + AND price < 1000 + """ +) + +client.searchAs[Product](query2) + +// OR conditions +val query3 = SQLQuery( + query = """ + SELECT * FROM products + WHERE category = 'electronics' OR category = 'computers' + """ +) + +client.searchAs[Product](query3) + +// IN clause +val query4 = SQLQuery( + query = """ + SELECT * FROM products + WHERE category IN ('electronics', 'computers', 'phones') + """ +) + +client.searchAs[Product](query4) + +// LIKE for pattern matching +val query5 = SQLQuery( + query = """ + SELECT * FROM products + WHERE name LIKE '%laptop%' + """ +) + +client.searchAs[Product](query5) +``` + +--- + +**ORDER BY and LIMIT** + +```scala +// Sort by price ascending +val sortAsc = SQLQuery( + query = """ + SELECT * FROM products + WHERE category = 'electronics' + ORDER BY price ASC + """ +) + +client.searchAs[Product](sortAsc) match { + case ElasticSuccess(products) => + println("Products sorted by price (low to high):") + products.foreach(p => println(s"${p.name}: ${p.price}")) +} + +// Sort by price descending with limit +val sortDesc = SQLQuery( + query = """ + SELECT * FROM products + WHERE category = 'electronics' + ORDER BY price DESC + LIMIT 10 + """ +) + +client.searchAs[Product](sortDesc) match { + case ElasticSuccess(products) => + println("Top 10 most expensive electronics:") + products.foreach(p => println(s"${p.name}: ${p.price}")) +} + +// Multiple sort fields +val multiSort = SQLQuery( + query = """ + SELECT * FROM products + ORDER BY category ASC, price DESC + LIMIT 20 + """ +) + +client.searchAs[Product](multiSort) + +// Pagination with LIMIT and OFFSET +val paginated = SQLQuery( + query = """ + SELECT * FROM products + WHERE category = 'electronics' + ORDER BY price ASC + LIMIT 20 OFFSET 40 + """ +) + +client.searchAs[Product](paginated) // Returns page 3 (items 41-60) +``` + +--- + +**GROUP BY and Aggregations** + +For a full list of supported [aggregation functions](../sql/functions_aggregate.md), refer to the SQL documentation. + +```scala +// Count by category +val countByCategory = SQLQuery( + query = """ + SELECT category, COUNT(*) as total + FROM products + GROUP BY category + """ +) + +client.search(countByCategory) + +// Average price by category +val avgPriceByCategory = SQLQuery( + query = """ + SELECT category, AVG(price) as avg_price + FROM products + GROUP BY category + """ +) + +client.search(avgPriceByCategory) + +// Multiple aggregations +val multiAgg = SQLQuery( + query = """ + SELECT + category, + COUNT(*) as total_products, + AVG(price) as avg_price, + MIN(price) as min_price, + MAX(price) as max_price, + SUM(price) as total_value + FROM products + GROUP BY category + """ +) + +client.search(multiAgg) + +// Aggregation with filter +val filteredAgg = SQLQuery( + query = """ + SELECT category, AVG(price) as avg_price + FROM products + WHERE price > 100 + GROUP BY category + """ +) + +client.search(filteredAgg) +``` + +--- + +**JOIN-like Queries (Multi-Search)** + +```scala +// Search across multiple indices +val multiIndexQuery = SQLQuery( + query = """ + SELECT * FROM products WHERE user_id = 'user-123' + UNION ALL + SELECT * FROM orders WHERE user_id = 'user-123' + UNION ALL + SELECT * FROM reviews WHERE user_id = 'user-123' + """ +) + +client.search(multiIndexQuery) match { + case ElasticSuccess(response) => + println("✅ Retrieved user data from multiple indices") +} + +// Related data queries +val relatedData = SQLQuery( + query = """ + SELECT * FROM products WHERE category = 'electronics' + UNION ALL + SELECT * FROM products WHERE category = 'accessories' + """ +) + +client.searchAs[Product](relatedData) match { + case ElasticSuccess(products) => + println(s"✅ Found ${products.size} related products") +} +``` + +--- + +**Date Range Queries** + +For date functions, refer to the [SQL Date / Time / Datetime / Timestamp / Interval Functions](../sql/functions_date_time.md) documentation. + +```scala +case class Order( + id: String, + userId: String, + total: Double, + status: String, + createdAt: String +) + +// Orders from last 30 days +val recentOrders = SQLQuery( + query = """ + SELECT * FROM orders + WHERE createdAt >= CURRENT_DATE - INTERVAL 30 DAY + AND status = 'completed' + ORDER BY createdAt DESC + """ +) + +client.searchAs[Order](recentOrders) + +// Orders in date range +val dateRange = SQLQuery( + query = """ + SELECT * FROM orders + WHERE createdAt BETWEEN '2024-01-01' AND '2024-01-31' + """ +) + +client.searchAs[Order](dateRange) + +// Orders from specific year +val yearQuery = SQLQuery( + query = """ + SELECT * FROM orders + WHERE EXTRACT(YEAR FROM createdAt) = 2024 + """ +) + +client.searchAs[Order](yearQuery) +``` + +--- + +**Async SQL Queries** + +```scala +import scala.concurrent.ExecutionContext.Implicits.global + +// Async SQL search +val asyncQuery = SQLQuery( + query = "SELECT * FROM products WHERE price > 500" +) + +client.searchAsyncAs[Product](asyncQuery).foreach { + case ElasticSuccess(products) => + println(s"✅ Found ${products.size} expensive products") + + case ElasticFailure(error) => + println(s"❌ Error: ${error.message}") +} + +// Parallel SQL queries +val queries = List( + SQLQuery("SELECT * FROM products WHERE category = 'electronics'"), + SQLQuery("SELECT * FROM products WHERE category = 'books'"), + SQLQuery("SELECT * FROM products WHERE category = 'clothing'") +) + +val futures = queries.map(query => client.searchAsyncAs[Product](query)) + +Future.sequence(futures).map { results => + results.foreach { + case ElasticSuccess(products) => + println(s"✅ Category: ${products.size} products") + case ElasticFailure(error) => + println(s"❌ Failed: ${error.message}") + } +} + +// Chained async SQL queries +val result = for { + products <- client.searchAsyncAs[Product](productQuery) + topProducts = products.getOrElse(Seq.empty).sortBy(-_.price).take(10) + orders <- client.searchAsyncAs[Order](orderQuery) +} yield (topProducts, orders) +``` + +--- + +## SQL Query Patterns + +### Full-Text Search with SQL + +For more details, refer to the [SQL Full-Text Search Function](../sql/functions_string.md#full-text-search-function) documentation. + +```scala +// Text search +val textSearch = SQLQuery( + query = """ + SELECT * FROM books + WHERE MATCH(title, abstract, preface, keywords) AGAINST ('machine learning') + """ +) + +client.searchAs[Book](textSearch) + +``` + +--- + +### Complex Filtering with SQL + +```scala +// Multiple conditions with precedence +val complexFilter = SQLQuery( + query = """ + SELECT * FROM products + WHERE (category = 'electronics' OR category = 'computers') + AND price BETWEEN 100 AND 1000 + AND stock > 0 + ORDER BY price ASC + """ +) + +client.searchAs[Product](complexFilter) + +// NOT conditions +val notQuery = SQLQuery( + query = """ + SELECT * FROM products + WHERE category = 'electronics' + AND NOT (brand = 'BrandX' OR brand = 'BrandY') + """ +) + +client.searchAs[Product](notQuery) + +// NULL checks +val nullCheck = SQLQuery( + query = """ + SELECT * FROM products + WHERE discount IS NOT NULL + AND discount > 0 + """ +) + +client.searchAs[Product](nullCheck) +``` + +--- + +## Best Practices for SQL Queries + +**1. Use SQL for Readable Queries** + +```scala +// ✅ Good - SQL is clear and readable +val sqlQuery = SQLQuery( + query = """ + SELECT * FROM products + WHERE category = 'electronics' + AND price BETWEEN 100 AND 1000 + ORDER BY price DESC + LIMIT 20 + """ +) + +client.searchAs[Product](sqlQuery) + +// ❌ Avoid - Complex JSON DSL +val jsonQuery = ElasticQuery( + query = """ + { + "query": { + "bool": { + "must": [ + {"term": {"category": "electronics"}}, + {"range": {"price": {"gte": 100, "lte": 1000}}} + ] + } + }, + "sort": [{"price": "desc"}], + "size": 20 + } + """, + indices = Seq("products") +) +``` + +--- + +**2. Use Type-Safe Conversion** + +```scala +// ✅ Good - Type-safe with case class +case class Product(id: String, name: String, price: Double) + +client.searchAs[Product](sqlQuery) match { + case ElasticSuccess(products) => products.foreach(println) + case ElasticFailure(error) => println(error.message) +} + +// ❌ Avoid - Raw response parsing +client.search(sqlQuery) match { + case ElasticSuccess(response) => + // Manual JSON parsing required +} +``` + +--- + +**3. Use Parameterized Queries** + +```scala +// ✅ Good - Parameterized query builder +def findProductsByCategory(category: String, minPrice: Double): SQLQuery = { + SQLQuery( + query = s""" + SELECT * FROM products + WHERE category = '$category' + AND price >= $minPrice + """ + ) +} + +client.searchAs[Product](findProductsByCategory("electronics", 100)) + +// ✅ Good - Query builder class +case class ProductQueryBuilder( + category: Option[String] = None, + minPrice: Option[Double] = None, + maxPrice: Option[Double] = None, + orderBy: Option[String] = None, + limit: Int = 100 +) { + def build: SQLQuery = { + val conditions = Seq( + category.map(c => s"category = '$c'"), + minPrice.map(p => s"price >= $p"), + maxPrice.map(p => s"price <= $p") + ).flatten + + val whereClause = if (conditions.nonEmpty) { + s"WHERE ${conditions.mkString(" AND ")}" + } else "" + + val orderClause = orderBy.map(field => s"ORDER BY $field").getOrElse("") + + SQLQuery( + query = s"SELECT * FROM products $whereClause $orderClause LIMIT $limit" + ) + } +} + +// Usage +val query = ProductQueryBuilder( + category = Some("electronics"), + minPrice = Some(100), + maxPrice = Some(1000), + orderBy = Some("price DESC"), + limit = 20 +).build + +client.searchAs[Product](query) +``` + +--- + +**4. Use Async for Multiple Queries** + +```scala +// ✅ Good - Parallel async SQL queries +import scala.concurrent.ExecutionContext.Implicits.global + +val queries = List( + SQLQuery("SELECT * FROM products WHERE category = 'electronics'"), + SQLQuery("SELECT * FROM products WHERE category = 'books'"), + SQLQuery("SELECT * FROM products WHERE category = 'clothing'") +) + +val futures = queries.map(query => client.searchAsyncAs[Product](query)) + +Future.sequence(futures).map { results => + results.foreach { + case ElasticSuccess(products) => + println(s"✅ Found ${products.size} products") + case ElasticFailure(error) => + println(s"❌ Failed: ${error.message}") + } +} + +// ❌ Avoid - Sequential blocking queries +val result1 = client.searchAs[Product](query1) +val result2 = client.searchAs[Product](query2) +val result3 = client.searchAs[Product](query3) +``` + +--- + +**5. Use UNION ALL for Multi-Index Queries** + +```scala +// ✅ Good - Use UNION ALL for combining results from multiple queries +val unionQuery = SQLQuery( + query = """ + SELECT * FROM products WHERE category = 'electronics' + UNION ALL + SELECT * FROM products WHERE category = 'computers' + UNION ALL + SELECT * FROM products WHERE category = 'phones' + """ +) + +client.searchAs[Product](unionQuery) match { + case ElasticSuccess(products) => + println(s"✅ Found ${products.size} products across categories") + val byCategory = products.groupBy(_.category) + byCategory.foreach { case (category, items) => + println(s"$category: ${items.size} items") + } + + case ElasticFailure(error) => + println(s"❌ Multi-search failed: ${error.message}") +} + +// ❌ Avoid - Multiple separate queries +val electronics = client.searchAs[Product](SQLQuery("SELECT * FROM products WHERE category = 'electronics'", ...)) +val computers = client.searchAs[Product](SQLQuery("SELECT * FROM products WHERE category = 'computers'", ...)) +val phones = client.searchAs[Product](SQLQuery("SELECT * FROM products WHERE category = 'phones'", ...)) +``` + +--- + +**6. Use Scroll API for Large Result Sets** + +```scala +// ✅ Good - Use Scroll API for large datasets +// See Scroll API documentation for complete implementation + +val scrollQuery = SQLQuery( + query = "SELECT * FROM large_index WHERE category = 'electronics'" +) + +// Use dedicated Scroll API methods: +// - ScrollApi.scroll() for iteration +// - ScrollApi.scrollAs[T]() for type-safe iteration +// See Scroll API documentation + +// ❌ Avoid - Regular search for large datasets +// Don't use searchAs() for more than 10,000 documents +client.searchAs[Product](sqlQuery) // Limited to max 10,000 results +``` + +--- + +## Summary + +### Key Takeaways + +1. **Use SQL queries** as the primary search method (key feature) +2. **Use UNION ALL** for combining multiple queries efficiently +3. **Use type-safe conversion** with `searchAs[T]()` methods +4. **Use async methods** for better performance +5. **Use Scroll API** for large result sets (>10,000 documents) +6. **Parameterize queries** for reusability and safety +7. **Handle errors** with pattern matching on `ElasticResult` +8. **Test thoroughly** with different query patterns + +### Quick Reference - SQL Search + +```scala +import org.json4s.DefaultFormats + +implicit val formats: Formats = DefaultFormats + +// ============================================================ +// BASIC SQL SEARCH +// ============================================================ +case class Product(id: String, name: String, price: Double, category: String) + +val query = SQLQuery( + query = "SELECT * FROM products WHERE category = 'electronics' AND price > 100" +) + +client.searchAs[Product](query) match { + case ElasticSuccess(products) => products.foreach(println) + case ElasticFailure(error) => println(s"Error: ${error.message}") +} + +// ============================================================ +// SQL WITH ORDER BY AND LIMIT +// ============================================================ +val sortedQuery = SQLQuery( + query = """ + SELECT * FROM products + WHERE category = 'electronics' + ORDER BY price DESC + LIMIT 20 + """ +) + +client.searchAs[Product](sortedQuery) + +// ============================================================ +// SQL WITH AGGREGATIONS +// ============================================================ +val aggQuery = SQLQuery( + query = """ + SELECT category, AVG(price) as avg_price, COUNT(*) as total + FROM products + GROUP BY category + """ +) + +client.search(aggQuery) + +// ============================================================ +// UNION ALL FOR MULTI-SEARCH +// ============================================================ +val unionQuery = SQLQuery( + query = """ + SELECT * FROM products WHERE category = 'electronics' + UNION ALL + SELECT * FROM products WHERE category = 'books' + UNION ALL + SELECT * FROM products WHERE category = 'clothing' + """ +) + +client.searchAs[Product](unionQuery) + +// ============================================================ +// ASYNC SQL SEARCH +// ============================================================ +import scala.concurrent.ExecutionContext.Implicits.global + +client.searchAsyncAs[Product](query).foreach { + case ElasticSuccess(products) => println(s"Found ${products.size}") + case ElasticFailure(error) => println(s"Error: ${error.message}") +} + +// ============================================================ +// LARGE DATASETS - USE SCROLL API +// ============================================================ +// For >10,000 documents, use dedicated Scroll API +// See Scroll API documentation for complete examples +``` + +--- + +[Back to Index](README.md) | [Next: Scroll API](scroll.md) \ No newline at end of file diff --git a/documentation/client/settings.md b/documentation/client/settings.md new file mode 100644 index 00000000..aabbd57d --- /dev/null +++ b/documentation/client/settings.md @@ -0,0 +1,604 @@ +[Back to index](README.md) + +# SETTINGS API + +## Overview + +The **SettingsApi** trait provides functionality to manage Elasticsearch index settings dynamically, including refresh intervals, replica counts, and custom settings updates. + +**Features:** +- Dynamic settings updates with automatic index close/open +- Refresh interval toggling for bulk operations optimization +- Replica management for availability control +- Settings inspection and retrieval +- Comprehensive validation and error handling + +**Dependencies:** +- Requires `IndicesApi` for index lifecycle operations (close/open) + +--- + +## Public Methods + +### toggleRefresh + +Enables or disables the automatic refresh interval for an index. Disabling refresh improves bulk indexing performance. + +**Signature:** + +```scala +def toggleRefresh(index: String, enable: Boolean): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - Name of the index +- `enable` - `true` to enable refresh (1s interval), `false` to disable (-1) + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if settings updated successfully +- `ElasticFailure` with error details + +**Behavior:** +- When enabled: Sets refresh interval to "1s" (default behavior) +- When disabled: Sets refresh interval to "-1" (no automatic refresh) +- Automatically closes and reopens the index + +**Examples:** + +```scala +// Disable refresh before bulk indexing +client.toggleRefresh("products", enable = false) match { + case ElasticSuccess(true) => + println("Refresh disabled for bulk operation") + case ElasticFailure(e) => + println(s"Failed to disable refresh: ${e.message}") +} + +// Bulk indexing workflow +for { + _ <- client.toggleRefresh("products", enable = false) + _ <- client.bulkIndex(largeDataset) + _ <- client.toggleRefresh("products", enable = true) + _ <- client.refresh("products") +} yield "Bulk indexing complete" + +// Performance optimization pattern +def bulkIndexWithOptimization[T]( + index: String, + documents: Seq[T] +): ElasticResult[Unit] = { + for { + _ <- client.toggleRefresh(index, enable = false) + _ <- client.bulkIndex(documents) + _ <- client.toggleRefresh(index, enable = true) + _ <- client.refresh(index) + } yield () +} +``` + +**Performance Impact:** +- **Disabled:** 2-5x faster bulk indexing, documents not immediately searchable +- **Enabled:** Normal indexing speed, documents searchable within 1 second + +--- + +### setReplicas + +Updates the number of replica shards for an index. + +**Signature:** + +```scala +def setReplicas(index: String, replicas: Int): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - Name of the index +- `replicas` - Number of replica shards (0 or more) + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if updated successfully +- `ElasticFailure` with error details + +**Behavior:** +- Automatically closes and reopens the index +- Changes take effect immediately after reopening + +**Examples:** + +```scala +// Increase replicas for high availability +client.setReplicas("critical-data", 2) + +// Remove replicas for single-node cluster +client.setReplicas("dev-index", 0) + +// Dynamic replica management based on cluster size +def adjustReplicas(index: String, nodeCount: Int): ElasticResult[Boolean] = { + val optimalReplicas = Math.max(0, nodeCount - 1) + client.setReplicas(index, optimalReplicas) +} + +// Disaster recovery: increase replicas +for { + _ <- client.setReplicas("users", 3) + _ <- client.setReplicas("orders", 3) + _ <- client.setReplicas("products", 3) +} yield "Replicas increased for all indices" + +// Temporary replica reduction for maintenance +def maintenanceMode(index: String): ElasticResult[Unit] = { + for { + currentSettings <- client.loadSettings(index) + _ <- client.setReplicas(index, 0) + _ <- performMaintenance(index) + _ <- client.setReplicas(index, 2) // Restore + } yield () +} +``` + +**Replica Guidelines:** +- **0 replicas:** Development, single-node clusters (no redundancy) +- **1 replica:** Production minimum (survives 1 node failure) +- **2+ replicas:** High availability (survives multiple node failures) + +--- + +### updateSettings + +Updates arbitrary index settings with custom JSON configuration. + +**Signature:** + +```scala +def updateSettings( + index: String, + settings: String = defaultSettings +): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - Name of the index +- `settings` - JSON string with settings to update (defaults to `defaultSettings`) + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if updated successfully +- `ElasticFailure` with error details (400 for validation errors) + +**Validation:** +- Index name format validation +- JSON syntax validation + +**Behavior:** +- Closes the index before applying settings +- Opens the index after successful update +- If closing fails, settings are not applied +- If update fails, index remains closed + +**Examples:** + +```scala +// Update multiple settings at once +val settings = """ +{ + "index": { + "refresh_interval": "30s", + "number_of_replicas": 2, + "max_result_window": 20000 + } +} +""" +client.updateSettings("products", settings) + +// Update analysis settings +val analysisSettings = """ +{ + "index": { + "analysis": { + "analyzer": { + "custom_analyzer": { + "type": "custom", + "tokenizer": "standard", + "filter": ["lowercase", "stop"] + } + } + } + } +} +""" +client.updateSettings("search-index", analysisSettings) + +// Adjust performance settings +val performanceSettings = """ +{ + "index": { + "refresh_interval": "60s", + "translog.durability": "async", + "translog.sync_interval": "30s" + } +} +""" +client.updateSettings("logs", performanceSettings) + +// Error handling +client.updateSettings("my-index", invalidJson) match { + case ElasticSuccess(true) => + println("Settings updated") + case ElasticSuccess(false) => + println("Update failed (index may be closed)") + case ElasticFailure(error) if error.statusCode.contains(400) => + println(s"Validation error: ${error.message}") + case ElasticFailure(error) => + println(s"Update error: ${error.message}") +} + +// Monadic settings update +for { + exists <- client.indexExists("my-index") + updated <- if (exists) client.updateSettings("my-index", newSettings) + else ElasticResult.failure("Index does not exist") +} yield updated +``` + +**Common Settings:** + +```scala +// Refresh interval +"""{"index": {"refresh_interval": "30s"}}""" + +// Replicas +"""{"index": {"number_of_replicas": 2}}""" + +// Max result window +"""{"index": {"max_result_window": 50000}}""" + +// Translog settings +"""{"index": {"translog.durability": "async"}}""" + +// Merge settings +"""{"index": {"merge.scheduler.max_thread_count": 1}}""" +``` + +⚠️ **Warning:** Some settings require the index to be closed. The method automatically handles this but the index will be temporarily unavailable. + +--- + +### loadSettings + +Retrieves the current settings of an index as a JSON string. + +**Signature:** + +```scala +def loadSettings(index: String): ElasticResult[String] +``` + +**Parameters:** +- `index` - Name of the index + +**Returns:** +- `ElasticSuccess[String]` containing index settings as JSON +- `ElasticFailure` with error details (400 for invalid index name, 404 if not found) + +**Behavior:** +- Retrieves full settings from Elasticsearch +- Extracts only the `index` settings object +- Validates JSON response structure + +**Examples:** + +```scala +// Load and inspect settings +client.loadSettings("products") match { + case ElasticSuccess(json) => + println(s"Current settings: $json") + case ElasticFailure(e) => + println(s"Failed to load: ${e.message}") +} + +// Parse settings for specific values +client.loadSettings("my-index").map { json => + val settings = parse(json) + val replicas = (settings \ "number_of_replicas").extract[Int] + val refreshInterval = (settings \ "refresh_interval").extract[String] + (replicas, refreshInterval) +} + +// Backup settings before modification +def updateWithBackup( + index: String, + newSettings: String +): ElasticResult[Boolean] = { + for { + backup <- client.loadSettings(index) + _ = saveToFile(s"$index-settings-backup.json", backup) + updated <- client.updateSettings(index, newSettings) + } yield updated +} + +// Compare settings across indices +val indices = List("index1", "index2", "index3") +val allSettings = indices.map { index => + index -> client.loadSettings(index) +} + +allSettings.foreach { + case (index, ElasticSuccess(settings)) => + println(s"$index: $settings") + case (index, ElasticFailure(e)) => + println(s"$index: Error - ${e.message}") +} + +// Extract specific setting +def getRefreshInterval(index: String): ElasticResult[String] = { + client.loadSettings(index).flatMap { json => + ElasticResult.attempt { + val settings = parse(json) + (settings \ "refresh_interval").extract[String] + } + } +} + +// Validate settings match expected configuration +def validateSettings( + index: String, + expectedReplicas: Int +): ElasticResult[Boolean] = { + client.loadSettings(index).map { json => + val settings = parse(json) + val actualReplicas = (settings \ "number_of_replicas").extract[Int] + actualReplicas == expectedReplicas + } +} +``` + +**Response Format:** + +The returned JSON contains index-level settings only: + +```scala +{ + "number_of_shards": "1", + "number_of_replicas": "1", + "refresh_interval": "1s", + "max_result_window": "10000", + "provided_name": "my-index", + "creation_date": "1699564800000", + "uuid": "abc123...", + "version": { + "created": "8110399" + } +} +``` + +--- + +## Implementation Requirements + +### executeUpdateSettings + +```scala +private[client] def executeUpdateSettings( + index: String, + settings: String +): ElasticResult[Boolean] +``` + +**Implementation Example:** + +```scala +private[client] def executeUpdateSettings( + index: String, + settings: String +): ElasticResult[Boolean] = { + executeRestAction[AcknowledgedResponse, Boolean]( + operation = "updateSettings", + index = Some(index) + )( + action = client.indices().putSettings( + new UpdateSettingsRequest(index).settings(settings, XContentType.JSON), + RequestOptions.DEFAULT + ) + )( + transformer = _.isAcknowledged + ) +} +``` + +--- + +### executeLoadSettings + +```scala +private[client] def executeLoadSettings(index: String): ElasticResult[String] +``` + +**Implementation Example:** + +```scala +private[client] def executeLoadSettings(index: String): ElasticResult[String] = { + executeRestAction[GetSettingsResponse, String]( + operation = "loadSettings", + index = Some(index) + )( + action = client.indices().getSettings( + new GetSettingsRequest().indices(index), + RequestOptions.DEFAULT + ) + )( + transformer = resp => { + val settings = resp.getIndexToSettings + settings.toString // Returns full JSON response + } + ) +} +``` + +--- + +## Error Handling + +**Invalid Index Name:** + +```scala +client.updateSettings("", newSettings) match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.operation.contains("updateSettings")) +} +``` + +**Invalid JSON Settings:** + +```scala +client.updateSettings("my-index", "{ invalid }") match { + case ElasticFailure(error) => + assert(error.message.contains("Invalid settings")) +} +``` + +**Index Not Found:** + +```scala +client.loadSettings("non-existent") match { + case ElasticFailure(error) => + assert(error.message.contains("not found")) +} +``` + +**Failed to Close Index:** + +```scala +// If index cannot be closed, settings update is aborted +client.updateSettings("locked-index", settings) match { + case ElasticFailure(error) => + println(s"Cannot update: ${error.message}") + // Index remains in original state +} +``` + +--- + +## Best Practices + +**Bulk Indexing Optimization:** + +```scala +def optimizedBulkIndex[T]( + index: String, + documents: Seq[T] +): ElasticResult[Unit] = { + for { + // Disable refresh + _ <- client.toggleRefresh(index, enable = false) + + // Reduce replicas temporarily + _ <- client.setReplicas(index, 0) + + // Perform bulk indexing + _ <- client.bulkIndex(documents) + + // Restore replicas + _ <- client.setReplicas(index, 1) + + // Re-enable refresh + _ <- client.toggleRefresh(index, enable = true) + + // Force refresh + _ <- client.refresh(index) + } yield () +} +``` + +**Settings Backup and Restore:** + +```scala +case class SettingsBackup(index: String, settings: String, timestamp: Long) + +def backupSettings(index: String): ElasticResult[SettingsBackup] = { + client.loadSettings(index).map { settings => + SettingsBackup(index, settings, System.currentTimeMillis()) + } +} + +def restoreSettings(backup: SettingsBackup): ElasticResult[Boolean] = { + client.updateSettings(backup.index, backup.settings) +} +``` + +**Gradual Settings Changes:** + +```scala +// Gradually increase replicas across cluster +def scaleReplicas(indices: List[String], targetReplicas: Int): Unit = { + indices.foreach { index => + client.setReplicas(index, targetReplicas) + Thread.sleep(5000) // Allow cluster to rebalance + } +} +``` + +--- + +## Workflow Examples + +**Complete Index Reconfiguration:** + +```scala +def reconfigureIndex(index: String): ElasticResult[Unit] = { + for { + // Backup current settings + backup <- client.loadSettings(index) + _ = println(s"Backed up settings: $backup") + + // Apply new settings + newSettings = """ + { + "index": { + "refresh_interval": "30s", + "number_of_replicas": 2, + "max_result_window": 20000 + } + } + """ + _ <- client.updateSettings(index, newSettings) + + // Verify changes + updated <- client.loadSettings(index) + _ = println(s"New settings: $updated") + } yield () +} +``` + +**Performance Tuning:** + +```scala +def tuneForWrites(index: String): ElasticResult[Unit] = { + val writeOptimized = """ + { + "index": { + "refresh_interval": "30s", + "number_of_replicas": 0, + "translog.durability": "async" + } + } + """ + client.updateSettings(index, writeOptimized).map(_ => ()) +} + +def tuneForReads(index: String): ElasticResult[Unit] = { + val readOptimized = """ + { + "index": { + "refresh_interval": "1s", + "number_of_replicas": 2 + } + } + """ + client.updateSettings(index, readOptimized).map(_ => ()) +} +``` + +--- + +[Back to index](README.md) | [Next: Alias Management](aliases.md) \ No newline at end of file diff --git a/documentation/client/update.md b/documentation/client/update.md new file mode 100644 index 00000000..a2df2cf6 --- /dev/null +++ b/documentation/client/update.md @@ -0,0 +1,981 @@ +[Back to index](README.md) + +# UPDATE API + +## Overview + +The **UpdateApi** trait provides functionality to update documents in Elasticsearch, supporting both synchronous and asynchronous operations with automatic serialization and upsert capabilities. + +**Features:** +- Synchronous and asynchronous updates +- Automatic JSON serialization from Scala objects +- Upsert support (insert if document doesn't exist) +- Type-safe updates with implicit serialization +- Automatic index refresh after updates +- Comprehensive validation and error handling +- Partial document updates + +**Dependencies:** +- Requires `RefreshApi` for automatic refresh after updates +- Requires `SerializationApi` for JSON serialization + +--- + +## Understanding Updates + +**Update vs Index:** +- **Update:** Modifies existing document fields (partial update) +- **Index:** Replaces entire document (full replacement) + +**Upsert Behavior:** +- `upsert = true`: Creates document if it doesn't exist +- `upsert = false`: Fails if document doesn't exist + +**Update Process:** +1. Retrieves current document +2. Applies changes +3. Re-indexes modified document +4. Optionally refreshes index + +--- + +## Public Methods + +### update + +Updates a document in Elasticsearch using a raw JSON string. + +**Signature:** + +```scala +def update( + index: String, + id: String, + source: String, + upsert: Boolean +): ElasticResult[Boolean] +``` + +**Parameters:** +- `index` - The index name +- `id` - The document ID to update +- `source` - The update data as JSON (partial or full document) +- `upsert` - Whether to create document if it doesn't exist + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if updated successfully +- `ElasticFailure` with error details (400 for validation errors, 404 if document not found and upsert=false) + +**Validation:** +- Index name format validation +- JSON syntax validation + +**Behavior:** +- Automatically refreshes index after successful update +- Returns failure if document doesn't exist and upsert=false + +**Examples:** + +```scala +// Basic update with upsert +val updateJson = """ +{ + "price": 899.99, + "updated_at": "2024-01-15T10:30:00Z" +} +""" +client.update("products", "prod-001", updateJson, upsert = true) match { + case ElasticSuccess(true) => println("Document updated") + case ElasticFailure(e) => println(s"Error: ${e.message}") +} + +// Update without upsert (fails if document doesn't exist) +val partialUpdate = """ +{ + "status": "shipped", + "tracking_number": "TRK123456" +} +""" +client.update("orders", "order-001", partialUpdate, upsert = false) + +// Full document replacement via update +val fullDoc = """ +{ + "name": "Updated Product", + "price": 999.99, + "category": "Electronics", + "tags": ["new", "featured"], + "updated_at": "2024-01-15T10:30:00Z" +} +""" +client.update("products", "prod-001", fullDoc, upsert = true) + +// Increment counter +val incrementJson = """ +{ + "views": 150, + "last_viewed": "2024-01-15T10:30:00Z" +} +""" +client.update("analytics", "page-001", incrementJson, upsert = true) + +// Update nested fields +val nestedUpdate = """ +{ + "user": { + "email": "newemail@example.com", + "verified": true + } +} +""" +client.update("users", "user-123", nestedUpdate, upsert = false) + +// Conditional update with validation +def updateIfValid( + index: String, + id: String, + json: String +): ElasticResult[Boolean] = { + for { + _ <- validateUpdateData(json) + result <- client.update(index, id, json, upsert = false) + } yield result +} + +// Update with retry on failure +def updateWithRetry( + index: String, + id: String, + json: String, + maxRetries: Int = 3 +): ElasticResult[Boolean] = { + def attempt(remaining: Int): ElasticResult[Boolean] = { + client.update(index, id, json, upsert = true) match { + case success @ ElasticSuccess(true) => success + case failure if remaining > 0 => + Thread.sleep(1000) + attempt(remaining - 1) + case failure => failure + } + } + attempt(maxRetries) +} + +// Batch updates +val updates = List( + ("prod-001", """{"price": 899.99}"""), + ("prod-002", """{"price": 699.99}"""), + ("prod-003", """{"price": 499.99}""") +) + +updates.foreach { case (id, json) => + client.update("products", id, json, upsert = true) +} + +// Error handling +client.update("products", "prod-999", updateJson, upsert = false) match { + case ElasticSuccess(true) => + println("✅ Document updated") + case ElasticFailure(error) if error.message.contains("not updated") => + println("❌ Document not found") + case ElasticFailure(error) => + println(s"❌ Update failed: ${error.message}") +} +``` + +--- + +### updateAs + +Updates a Scala object in Elasticsearch with automatic JSON serialization. + +**Signature:** + +```scala +def updateAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String] = None, + maybeType: Option[String] = None, + upsert: Boolean = true +)(implicit u: ClassTag[U], formats: Formats): ElasticResult[Boolean] +``` + +**Parameters:** +- `entity` - The Scala object containing update data +- `id` - The document ID to update +- `index` - Optional index name (defaults to entity type name) +- `maybeType` - Optional type name (defaults to class name in lowercase) +- `upsert` - Whether to create document if it doesn't exist (default: true) +- `u` - Implicit ClassTag for type information +- `formats` - Implicit JSON serialization formats + +**Returns:** +- `ElasticSuccess[Boolean]` with `true` if updated successfully +- `ElasticFailure` with error details + +**Examples:** + +```scala +// Domain models +case class Product(name: String, price: Double, category: String) +case class ProductUpdate(price: Double, updatedAt: String) +case class User(username: String, email: String, verified: Boolean) + +implicit val formats: Formats = DefaultFormats + +// Basic update with upsert +val productUpdate = ProductUpdate(899.99, "2024-01-15T10:30:00Z") +client.updateAs(productUpdate, id = "prod-001") match { + case ElasticSuccess(true) => println("Product updated") + case ElasticFailure(e) => println(s"Error: ${e.message}") +} + +// Update with explicit index +val user = User("john_doe", "newemail@example.com", verified = true) +client.updateAs( + entity = user, + id = "user-123", + index = Some("users-v2"), + upsert = true +) + +// Update without upsert (strict update) +client.updateAs( + entity = productUpdate, + id = "prod-001", + index = Some("products"), + upsert = false +) match { + case ElasticSuccess(true) => println("✅ Existing document updated") + case ElasticFailure(e) => println(s"❌ Document not found: ${e.message}") +} + +// Partial update model +case class StatusUpdate(status: String, updatedBy: String, timestamp: Long) + +val statusUpdate = StatusUpdate( + status = "completed", + updatedBy = "admin", + timestamp = System.currentTimeMillis() +) +client.updateAs(statusUpdate, id = "task-001", index = Some("tasks")) + +// Update multiple documents +val updates = List( + ("prod-001", ProductUpdate(899.99, "2024-01-15")), + ("prod-002", ProductUpdate(699.99, "2024-01-15")), + ("prod-003", ProductUpdate(499.99, "2024-01-15")) +) + +updates.foreach { case (id, update) => + client.updateAs(update, id, index = Some("products")) +} + +// Conditional update based on current state +def updateIfChanged[T <: AnyRef]( + entity: T, + id: String +)(implicit ct: ClassTag[T], formats: Formats): ElasticResult[Boolean] = { + for { + current <- client.get(id, indexNameFor[T]) + needsUpdate = hasChanged(current, entity) + result <- if (needsUpdate) { + client.updateAs(entity, id, upsert = false) + } else { + ElasticResult.success(false) + } + } yield result +} + +// Type-safe update with validation +def validateAndUpdate[T <: AnyRef]( + entity: T, + id: String +)(implicit ct: ClassTag[T], formats: Formats): ElasticResult[Boolean] = { + for { + validated <- validate(entity) + updated <- client.updateAs(validated, id, upsert = true) + } yield updated +} + +// Monadic composition +def updateWithAudit[T <: AnyRef]( + entity: T, + id: String, + userId: String +)(implicit ct: ClassTag[T], formats: Formats): ElasticResult[Boolean] = { + for { + updated <- client.updateAs(entity, id) + _ <- logAuditTrail(id, userId, "update") + } yield updated +} + +// Error handling +client.updateAs(productUpdate, "prod-001", upsert = false) match { + case ElasticSuccess(true) => + println("✅ Document updated successfully") + case ElasticFailure(error) if error.message.contains("not updated") => + println("❌ Document does not exist") + case ElasticFailure(error) => + println(s"❌ Update failed: ${error.message}") + error.cause.foreach(ex => println(s"Cause: ${ex.getMessage}")) +} +``` + +--- + +### updateAsync + +Asynchronously updates a document using a raw JSON string. + +**Signature:** + +```scala +def updateAsync( + index: String, + id: String, + source: String, + upsert: Boolean +)(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] +``` + +**Parameters:** +- `index` - The index name +- `id` - The document ID to update +- `source` - The update data as JSON +- `upsert` - Whether to create document if it doesn't exist +- `ec` - Implicit ExecutionContext + +**Returns:** +- `Future[ElasticResult[Boolean]]` that completes when update finishes + +**Validation:** +- Index name and JSON validation performed synchronously before async execution + +**Examples:** + +```scala +import scala.concurrent.ExecutionContext.Implicits.global +import scala.concurrent.Future + +// Basic async update +val updateJson = """{"price": 899.99, "updated_at": "2024-01-15"}""" +val future = client.updateAsync("products", "prod-001", updateJson, upsert = true) + +future.onComplete { + case Success(ElasticSuccess(true)) => + println("✅ Document updated") + case Success(ElasticFailure(error)) => + println(s"❌ Error: ${error.message}") + case Failure(ex) => + println(s"❌ Future failed: ${ex.getMessage}") +} + +// Batch async updates +val updates = List( + ("prod-001", """{"price": 899.99}"""), + ("prod-002", """{"price": 699.99}"""), + ("prod-003", """{"price": 499.99}""") +) + +val futures = updates.map { case (id, json) => + client.updateAsync("products", id, json, upsert = true) +} + +Future.sequence(futures).map { results => + val successCount = results.count { + case ElasticSuccess(true) => true + case _ => false + } + println(s"Updated $successCount/${results.length} documents") +} + +// Chained async operations +def fetchUpdateAndSave(id: String): Future[ElasticResult[Boolean]] = { + for { + data <- fetchFromExternalAPI(id) + json = transformToJson(data) + result <- client.updateAsync("products", id, json, upsert = true) + } yield result +} + +// Parallel updates with rate limiting +def updateWithRateLimit( + updates: List[(String, String)], + maxConcurrent: Int = 10 +): Future[List[ElasticResult[Boolean]]] = { + updates.grouped(maxConcurrent).foldLeft(Future.successful(List.empty[ElasticResult[Boolean]])) { + case (accFuture, batch) => + accFuture.flatMap { acc => + val batchFutures = batch.map { case (id, json) => + client.updateAsync("products", id, json, upsert = true) + } + Future.sequence(batchFutures).map(acc ++ _) + } + } +} + +// Error recovery +client.updateAsync("products", "prod-001", updateJson, upsert = true) + .recover { + case ex: Exception => + ElasticFailure(ElasticError( + message = s"Update failed: ${ex.getMessage}", + cause = Some(ex) + )) + } + .foreach { + case ElasticSuccess(true) => println("Success") + case ElasticFailure(e) => println(s"Failed: ${e.message}") + } + +// Conditional async update +def updateIfExists( + index: String, + id: String, + json: String +): Future[ElasticResult[Boolean]] = { + client.existsAsync(index, id).flatMap { + case ElasticSuccess(true) => + client.updateAsync(index, id, json, upsert = false) + case ElasticSuccess(false) => + Future.successful(ElasticFailure(ElasticError( + message = s"Document $id does not exist" + ))) + case failure @ ElasticFailure(_) => + Future.successful(failure) + } +} + +// Streaming updates +import akka.stream.scaladsl._ + +def streamUpdates( + index: String, + source: Source[(String, String), _] +): Future[Int] = { + source + .mapAsync(parallelism = 10) { case (id, json) => + client.updateAsync(index, id, json, upsert = true) + } + .runFold(0) { (count, result) => + result match { + case ElasticSuccess(true) => count + 1 + case _ => count + } + } +} +``` + +--- + +### updateAsyncAs + +Asynchronously updates a Scala object with automatic JSON serialization. + +**Signature:** + +```scala +def updateAsyncAs[U <: AnyRef]( + entity: U, + id: String, + index: Option[String] = None, + maybeType: Option[String] = None, + upsert: Boolean = true +)(implicit + u: ClassTag[U], + ec: ExecutionContext, + formats: Formats +): Future[ElasticResult[Boolean]] +``` + +**Parameters:** +- `entity` - The Scala object containing update data +- `id` - The document ID to update +- `index` - Optional index name +- `maybeType` - Optional type name +- `upsert` - Whether to create if doesn't exist (default: true) +- `u` - Implicit ClassTag +- `ec` - Implicit ExecutionContext +- `formats` - Implicit JSON serialization formats + +**Returns:** +- `Future[ElasticResult[Boolean]]` that completes when update finishes + +**Examples:** + +```scala +import scala.concurrent.ExecutionContext.Implicits.global + +case class ProductUpdate(price: Double, updatedAt: String) +implicit val formats: Formats = DefaultFormats + +// Basic async update +val update = ProductUpdate(899.99, "2024-01-15") +val future = client.updateAsyncAs(update, id = "prod-001") + +future.onComplete { + case Success(ElasticSuccess(true)) => + println("✅ Product updated") + case Success(ElasticFailure(error)) => + println(s"❌ Error: ${error.message}") + case Failure(ex) => + println(s"❌ Future failed: ${ex.getMessage}") +} + +// Batch async updates +val updates = List( + ("prod-001", ProductUpdate(899.99, "2024-01-15")), + ("prod-002", ProductUpdate(699.99, "2024-01-15")), + ("prod-003", ProductUpdate(499.99, "2024-01-15")) +) + +val futures = updates.map { case (id, update) => + client.updateAsyncAs(update, id, index = Some("products")) +} + +Future.sequence(futures).map { results => + val successful = results.count { + case ElasticSuccess(true) => true + case _ => false + } + println(s"✅ Updated $successful/${results.length} documents") +} + +// Non-blocking pipeline +def processAndUpdate[T <: AnyRef]( + data: List[(String, RawData)] +)(implicit ct: ClassTag[T], ec: ExecutionContext, formats: Formats): Future[Int] = { + val processedFuture = Future { + data.map { case (id, raw) => (id, transform(raw)) } + } + + processedFuture.flatMap { processed => + val updateFutures = processed.map { case (id, entity) => + client.updateAsyncAs(entity, id, upsert = true) + } + + Future.sequence(updateFutures).map { results => + results.count { + case ElasticSuccess(true) => true + case _ => false + } + } + } +} + +// Error recovery with retry +def updateWithRetry[T <: AnyRef]( + entity: T, + id: String, + maxRetries: Int = 3 +)(implicit + ct: ClassTag[T], + ec: ExecutionContext, + formats: Formats +): Future[ElasticResult[Boolean]] = { + + def attempt(remaining: Int): Future[ElasticResult[Boolean]] = { + client.updateAsyncAs(entity, id, upsert = true).flatMap { + case success @ ElasticSuccess(true) => Future.successful(success) + case failure if remaining > 0 => + Future { + Thread.sleep(1000) + }.flatMap(_ => attempt(remaining - 1)) + case failure => Future.successful(failure) + } + } + + attempt(maxRetries) +} + +// Parallel updates with error collection +def updateAllWithErrors[T <: AnyRef]( + updates: List[(String, T)] +)(implicit + ct: ClassTag[T], + ec: ExecutionContext, + formats: Formats +): Future[(List[String], List[(String, ElasticError)])] = { + + val futures = updates.map { case (id, entity) => + client.updateAsyncAs(entity, id, upsert = true).map(result => (id, result)) + } + + Future.sequence(futures).map { results => + val (successes, failures) = results.partition { + case (_, ElasticSuccess(true)) => true + case _ => false + } + + val successIds = successes.map(_._1) + val failureDetails = failures.collect { + case (id, ElasticFailure(error)) => (id, error) + } + + (successIds, failureDetails) + } +} + +// Await result (for testing) +import scala.concurrent.Await +import scala.concurrent.duration._ + +val result = Await.result( + client.updateAsyncAs(update, id = "prod-001"), + 5.seconds +) +``` + +--- + +## Implementation Requirements + +### executeUpdate + +```scala +private[client] def executeUpdate( + index: String, + id: String, + source: String, + upsert: Boolean +): ElasticResult[Boolean] +``` + +**Implementation Example:** + +```scala +private[client] def executeUpdate( + index: String, + id: String, + source: String, + upsert: Boolean +): ElasticResult[Boolean] = { + executeRestAction[UpdateResponse, Boolean]( + operation = "update", + index = Some(index) + )( + action = { + val request = new UpdateRequest(index, id) + .doc(source, XContentType.JSON) + + if (upsert) { + request.docAsUpsert(true) + } + + client.update(request, RequestOptions.DEFAULT) + } + )( + transformer = resp => { + resp.getResult == DocWriteResponse.Result.UPDATED || + resp.getResult == DocWriteResponse.Result.CREATED + } + ) +} +``` + +--- + +### executeUpdateAsync + +```scala +private[client] def executeUpdateAsync( + index: String, + id: String, + source: String, + upsert: Boolean +)(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] +``` + +**Implementation Example:** + +```scala +private[client] def executeUpdateAsync( + index: String, + id: String, + source: String, + upsert: Boolean +)(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] = { + val promise = Promise[ElasticResult[Boolean]]() + + val request = new UpdateRequest(index, id) + .doc(source, XContentType.JSON) + + if (upsert) { + request.docAsUpsert(true) + } + + client.updateAsync( + request, + RequestOptions.DEFAULT, + new ActionListener[UpdateResponse] { + override def onResponse(response: UpdateResponse): Unit = { + val success = response.getResult == DocWriteResponse.Result.UPDATED || + response.getResult == DocWriteResponse.Result.CREATED + promise.success(ElasticSuccess(success)) + } + + override def onFailure(e: Exception): Unit = { + promise.success(ElasticFailure(ElasticError( + message = s"Async update failed: ${e.getMessage}", + operation = Some("updateAsync"), + index = Some(index), + cause = Some(e) + ))) + } + } + ) + + promise.future +} +``` + +--- + +## Common Patterns + +### Repository Pattern with Updates + +```scala +trait Repository[T <: AnyRef] { + def update(entity: T, id: String, createIfMissing: Boolean = true)(implicit + ct: ClassTag[T], + formats: Formats, + client: ElasticClient + ): ElasticResult[Boolean] = { + client.updateAs(entity, id, upsert = createIfMissing) + } + + def updateAsync(entity: T, id: String, createIfMissing: Boolean = true)(implicit + ct: ClassTag[T], + formats: Formats, + ec: ExecutionContext, + client: ElasticClient + ): Future[ElasticResult[Boolean]] = { + client.updateAsyncAs(entity, id, upsert = createIfMissing) + } +} + +case class Product(name: String, price: Double, stock: Int) + +object ProductRepository extends Repository[Product] { + implicit val formats: Formats = DefaultFormats + + def updatePrice(id: String, newPrice: Double)(implicit + client: ElasticClient + ): ElasticResult[Boolean] = { + val json = s"""{"price": $newPrice}""" + client.update("products", id, json, upsert = false) + } + + def updateStock(id: String, quantity: Int)(implicit + client: ElasticClient + ): ElasticResult[Boolean] = { + val json = s"""{"stock": $quantity}""" + client.update("products", id, json, upsert = false) + } +} +``` + +### Partial Update Pattern + +```scala +// Separate update models from domain models +case class Product(name: String, price: Double, category: String, stock: Int) + +case class PriceUpdate(price: Double, updatedAt: String) +case class StockUpdate(stock: Int, updatedBy: String) +case class StatusUpdate(status: String, timestamp: Long) + +def updatePrice(id: String, newPrice: Double): ElasticResult[Boolean] = { + val update = PriceUpdate(newPrice, java.time.Instant.now().toString) + client.updateAs(update, id, index = Some("products"), upsert = false) +} + +def updateStock(id: String, quantity: Int, user: String): ElasticResult[Boolean] = { + val update = StockUpdate(quantity, user) + client.updateAs(update, id, index = Some("products"), upsert = false) +} +``` + +### Optimistic Locking + +```scala +case class VersionedDocument[T](data: T, version: Long) + +def updateWithVersion[T <: AnyRef]( + entity: T, + id: String, + expectedVersion: Long +)(implicit ct: ClassTag[T], formats: Formats): ElasticResult[Boolean] = { + for { + current <- client.get(id, indexNameFor[T]) + currentVersion = extractVersion(current) + _ <- if (currentVersion == expectedVersion) { + ElasticResult.success(()) + } else { + ElasticResult.failure(s"Version mismatch: expected $expectedVersion, got $currentVersion") + } + updated <- client.updateAs(entity, id, upsert = false) + } yield updated +} +``` + +### Conditional Update + +```scala +def updateIfCondition[T <: AnyRef]( + entity: T, + id: String, + condition: T => Boolean +)(implicit ct: ClassTag[T], formats: Formats): ElasticResult[Boolean] = { + for { + current <- client.getAs[T](id) + shouldUpdate = condition(current) + result <- if (shouldUpdate) { + client.updateAs(entity, id, upsert = false) + } else { + ElasticResult.success(false) + } + } yield result +} + +// Usage +updateIfCondition(productUpdate, "prod-001", (p: Product) => p.stock > 0) +``` + +--- + +## Performance Optimization + +### Batch Updates with Disabled Refresh + +```scala +def batchUpdateOptimized[T <: AnyRef]( + updates: List[(String, T)], + indexName: String +)(implicit ct: ClassTag[T], formats: Formats): ElasticResult[Unit] = { + for { + // Disable refresh + _ <- client.toggleRefresh(indexName, enable = false) + + // Update all documents + _ <- updates.foldLeft(ElasticResult.success(())) { case (acc, (id, entity)) => + acc.flatMap(_ => client.updateAs(entity, id, Some(indexName), upsert = true).map(_ => ())) + } + + // Re-enable refresh + _ <- client.toggleRefresh(indexName, enable = true) + + // Manual refresh + _ <- client.refresh(indexName) + } yield () +} +``` + +### Parallel Async Updates + +```scala +def parallelUpdate[T <: AnyRef]( + updates: List[(String, T)], + parallelism: Int = 10 +)(implicit + ct: ClassTag[T], + ec: ExecutionContext, + formats: Formats +): Future[List[ElasticResult[Boolean]]] = { + + updates + .grouped(parallelism) + .foldLeft(Future.successful(List.empty[ElasticResult[Boolean]])) { + case (accFuture, batch) => + accFuture.flatMap { acc => + val batchFutures = batch.map { case (id, entity) => + client.updateAsyncAs(entity, id, upsert = true) + } + Future.sequence(batchFutures).map(acc ++ _) + } + } +} +``` + +--- + +## Error Handling + +**Document Not Found (upsert=false):** + +```scala +client.update("products", "non-existent", updateJson, upsert = false) match { + case ElasticFailure(error) if error.message.contains("not updated") => + println("❌ Document does not exist") + case ElasticFailure(error) => + println(s"❌ Other error: ${error.message}") +} +``` + +**Invalid JSON:** + +```scala +client.update("products", "prod-001", "{ invalid }", upsert = true) match { + case ElasticFailure(error) => + assert(error.statusCode.contains(400)) + assert(error.message.contains("Invalid JSON")) +} +``` + +**Serialization Failure:** + +```scala +case class InvalidEntity(data: java.io.InputStream) // Not serializable + +client.updateAs(InvalidEntity(null), "doc-1", upsert = true) match { + case ElasticFailure(error) => + println(s"Serialization failed: ${error.message}") +} +``` + +--- + +## Best Practices + +**1. Use Partial Update Models** + +```scala +// ✅ Good - explicit update model +case class PriceUpdate(price: Double) +client.updateAs(PriceUpdate(899.99), "prod-001") + +// ❌ Avoid - full domain model for partial updates +case class Product(name: String, price: Double, category: String, stock: Int) +client.updateAs(Product("", 899.99, "", 0), "prod-001") // Overwrites other fields +``` + +**2. Choose Appropriate Upsert Behavior** + +```scala +// ✅ Use upsert=true for idempotent operations +client.updateAs(entity, id, upsert = true) + +// ✅ Use upsert=false when document must exist +client.updateAs(entity, id, upsert = false) +``` + +**3. Handle Document Not Found Gracefully** + +```scala +def safeUpdate[T <: AnyRef](entity: T, id: String)(implicit + ct: ClassTag[T], + formats: Formats +): ElasticResult[Boolean] = { + client.updateAs(entity, id, upsert = false).recoverWith { + case error if error.message.contains("not updated") => + // ... + } +} +``` + +--- + +[Back to index](README.md) | [Next: Delete Documents](delete.md) \ No newline at end of file diff --git a/documentation/client/version.md b/documentation/client/version.md new file mode 100644 index 00000000..9b9c6db7 --- /dev/null +++ b/documentation/client/version.md @@ -0,0 +1,130 @@ +[Back to index](README.md) + +# VERSION API + +## Overview + +The **VersionApi** trait provides functionality to retrieve the Elasticsearch cluster version with automatic caching to minimize network overhead. + +**Dependencies:** Requires `SerializationApi` for JSON response parsing. + +--- + +## Public Methods + +### version + +Retrieves the Elasticsearch cluster version. Results are cached after the first successful call. + +**Signature:** + +```scala +def version: ElasticResult[String] +``` + +**Returns:** +- `ElasticSuccess[String]` containing the version string (e.g., "7.17.0", "8.11.3") +- `ElasticFailure` with error details if retrieval fails + +**Behavior:** +- Returns cached version immediately if available +- On first call, executes `executeVersion()` and caches successful results +- Failures are NOT cached (allows retry on next call) +- Logs success with ✅ and failures with ❌ + +**Examples:** + +```scala +// Basic usage +val result = client.version +result match { + case ElasticSuccess(v) => println(s"Running ES $v") + case ElasticFailure(e) => println(s"Error: ${e.message}") +} + +// Monadic operations +val majorVersion = client.version.map(_.split("\\.").head.toInt) + +// Version-based logic +client.version.foreach { v => + if (v.startsWith("8.")) { + // Use ES 8.x specific features + } +} + +// Compatibility check +def requiresMinVersion(min: String): ElasticResult[Unit] = { + client.version.flatMap { current => + if (current >= min) ElasticResult.success(()) + else ElasticResult.failure(s"Requires ES $min+, found $current") + } +} +``` + +--- + +## Implementation Requirements + +### executeVersion + +Must be implemented by each client-specific trait. + +**Signature:** + +```scala +private[client] def executeVersion(): ElasticResult[String] +``` + +**Implementation Examples:** + +**Java Client (ES 8-9):** + +```scala +private[client] def executeVersion(): ElasticResult[String] = { + executeJavaAction[InfoResponse, String]( + operation = "version" + )( + action = client.info() + )( + transformer = _.version().number() + ) +} +``` + +**REST High Level Client (ES 6-7):** + +```scala +private[client] def executeVersion(): ElasticResult[String] = { + executeRestLowLevelAction[String]( + operation = "version" + )( + request = new Request("GET", "/") + )( + transformer = { resp => + val json = parse(EntityUtils.toString(resp.getEntity)) + (json \\ "version" \\ "number").extract[String] + } + ) +} +``` + +**Jest Client (ES 5-6):** + +```scala +private[client] def executeVersion(): ElasticResult[String] = { + executeJestAction[JestResult, String]( + operation = "version" + )( + action = new Info.Builder().build() + )( + transformer = { result => + val json = parse(result.getJsonString) + (json \\ "version").extract[String] + } + ) +} +``` + +--- + +[Back to index](README.md) | [Next: Flush Index](flush.md) \ No newline at end of file diff --git a/documentation/functions_aggregate.md b/documentation/functions_aggregate.md deleted file mode 100644 index 9cc876d3..00000000 --- a/documentation/functions_aggregate.md +++ /dev/null @@ -1,171 +0,0 @@ -[Back to index](./README.md) - -# Aggregate Functions - -**Navigation:** [Functions — Date / Time](./functions_date_time.md) · [Functions — Conditional](./functions_conditional.md) - -This page documents aggregate functions. - ---- - -### Function: COUNT -**Description:** -Count rows or non-null expressions. -With `DISTINCT` counts distinct values. - -**Inputs:** -- `expr` or `*`; optional `DISTINCT` - -**Output:** -- `BIGINT` - -**Example:** -```sql -SELECT COUNT(*) AS total FROM emp; --- Result: total = 42 - -SELECT COUNT(DISTINCT salary) AS distinct_salaries FROM emp; --- Result: 8 -``` - ---- - -### Function: SUM -**Description:** -Sum of values. - -**Inputs:** -- `expr` (`NUMERIC`) - -**Output:** -- `NUMERIC` - -**Example:** -```sql -SELECT SUM(salary) AS total_salary FROM emp; -``` - ---- - -### Function: AVG -**Description:** -Average of values. - -**Inputs:** -- `expr` (`NUMERIC`) - -**Output:** -- `DOUBLE` - -**Example:** -```sql -SELECT AVG(salary) AS avg_salary FROM emp; -``` - ---- - -### Function: MIN -**Description:** -Minimum value in group. - -**Inputs:** -- `expr` (comparable) - -**Output:** -- same as input - -**Example:** -```sql -SELECT MIN(hire_date) AS earliest FROM emp; -``` - ---- - -### Function: MAX -**Description:** -Maximum value in group. - -**Inputs:** -- `expr` (comparable) - -**Output:** -- same as input - -**Example:** -```sql -SELECT MAX(salary) AS top_salary FROM emp; -``` - ---- - -### Function: FIRST_VALUE -**Description:** -Window: first value ordered by `ORDER BY`. Pushed as `top_hits size=1` to ES when possible. - -**Inputs:** -- `expr` with optional `OVER (PARTITION BY ... ORDER BY ...)` -If `OVER` is not provided, only the expr column name is used for the sorting. - -**Output:** -- same as input - -**Example:** -```sql -SELECT FIRST_VALUE(salary) -OVER ( - PARTITION BY department - ORDER BY hire_date ASC -) AS first_salary -FROM emp; -``` - ---- - -### Function: LAST_VALUE -**Description:** -Window: last value ordered by `ORDER BY. Pushed to ES by flipping sort order in `top_hits`. - -**Inputs:** -- `expr` with optional `OVER (PARTITION BY ... ORDER BY ...)` -If `OVER` is not provided, only the expr column name is used for the sorting. - -**Output:** -- same as input - -**Example:** -```sql -SELECT LAST_VALUE(salary) -OVER ( - PARTITION BY department - ORDER BY hire_date ASC -) AS last_salary -FROM emp; -``` - ---- - -### Function: ARRAY_AGG -**Description:** -Collect values into an array for each partition. Implemented using `OVER` and pushed to ES as `top_hits`. Post-processing converts hits to an array of scalars. - -**Inputs:** -- `expr` with optional `OVER (PARTITION BY ... ORDER BY ... )` -If `OVER` is not provided, only the expr column name is used for the sorting. - -**Output:** -- `ARRAY` - -**Example:** -```sql -SELECT department, -ARRAY_AGG(name) OVER ( - PARTITION BY department - ORDER BY hire_date ASC -) AS employees -FROM emp -LIMIT 100; --- Result: employees as an array of name values --- per department (sorted and limited) -``` - -[Back to index](./README.md) diff --git a/documentation/functions_conditional.md b/documentation/functions_conditional.md deleted file mode 100644 index f02a19fd..00000000 --- a/documentation/functions_conditional.md +++ /dev/null @@ -1,133 +0,0 @@ -[Back to index](./README.md) - -# Conditional Functions - -This page documents conditional expressions. - ---- - -### Function: CASE (searched form) -**Name & Aliases:** `CASE WHEN ... THEN ... ELSE ... END` (searched CASE form) - -**Description:** -Evaluates boolean WHEN expressions in order; returns the result expression corresponding to the first true condition; if none match, returns the ELSE expression (or NULL if ELSE omitted). - -**Inputs:** -- One or more `WHEN condition THEN result` pairs. Optional `ELSE result`. - -**Output:** -- Type coerced from result expressions (THEN/ELSE). - -**Example:** -```sql -SELECT CASE - WHEN salary > 100000 THEN 'very_high' - WHEN salary > 50000 THEN 'high' - ELSE 'normal' - END AS salary_band -FROM emp; --- Result: 'very_high' / 'high' / 'normal' -``` - ---- - -### Function: CASE (simple / expression form) -**Name & Aliases:** `CASE expr WHEN val1 THEN r1 WHEN val2 THEN r2 ... ELSE rN END` (simple CASE) - -**Description:** -Compare `expr` to `valN` sequentially using equality; returns corresponding `rN` for first match; else `ELSE` result or NULL. - -**Inputs:** -- `expr` (any comparable type) and pairs `WHEN value THEN result`. - -**Output:** -- Type coerced from result expressions. - -**Example:** -```sql -SELECT CASE department - WHEN 'IT' THEN 'tech' - WHEN 'Sales' THEN 'revenue' - ELSE 'other' - END AS dept_category -FROM emp; --- Result: 'tech', 'revenue', or 'other' depending on department -``` - -**Implementation notes:** -- The simple form evaluates by comparing `expr = value` for each WHEN. -- Both CASE forms are parsed and translated into nested conditional Painless scripts for `script_fields` when used outside an aggregation push-down. - ---- - -### Function: COALESCE -**Description:** -Return first non-null argument. - -**Inputs:** -- `expr1, expr2, ...` - -**Output:** -- Value of first non-null expression (coerced) - -**Example:** -```sql -SELECT COALESCE(nickname, firstname, 'N/A') AS display FROM users; --- Result: 'Jo' or 'John' or 'N/A' -``` - ---- - -### Function: NULLIF -**Description:** -Return NULL if expr1 = expr2; otherwise return expr1. - -**Inputs:** -- `expr1, expr2` - -**Output:** -- Type of `expr1` - -**Example:** -```sql -SELECT NULLIF(status, 'unknown') AS status_norm FROM events; --- Result: NULL if status is 'unknown', else original status -``` - ---- - -### Function: ISNULL -**Description:** -Test nullness. - -**Inputs:** -- `expr` - -**Output:** -- `BOOLEAN` - -**Example:** -```sql -SELECT ISNULL(manager) AS manager_missing FROM emp; --- Result: TRUE if manager is NULL, else FALSE -``` - ---- - -### Function: ISNOTNULL -**Description:** -Test non-nullness. - -**Inputs:** -- `expr` - -**Output:** -- `BOOLEAN` - -**Example:** -```sql -SELECT ISNOTNULL(manager) AS manager_missing FROM emp; --- Result: TRUE if manager is NOT NULL, else FALSE -``` - -[Back to index](./README.md) diff --git a/documentation/functions_date_time.md b/documentation/functions_date_time.md deleted file mode 100644 index b441cafa..00000000 --- a/documentation/functions_date_time.md +++ /dev/null @@ -1,450 +0,0 @@ -[Back to index](./README.md) - -# Date / Time / Datetime / Timestamp / Interval Functions - -**Navigation:** [Aggregate functions](./functions_aggregate.md) · [Operator Precedence](./operator_precedence.md) - -This page documents TEMPORAL functions. - ---- - -### Function: CURRENT_TIMESTAMP (Alias: NOW, CURRENT_DATETIME) -**Description:** -Returns current datetime (ZonedDateTime) in UTC. - -**Inputs:** -- none - -**Output:** -- `TIMESTAMP` / `DATETIME` - -**Example:** -```sql -SELECT CURRENT_TIMESTAMP AS now; --- Result: 2025-09-26T12:34:56Z -``` - ---- - -### Function: CURRENT_DATE (Alias: CURDATE, TODAY) -**Description:** -Returns current date as `DATE`. - -**Inputs:** -- none - -**Output:** -- `DATE` - -**Example:** -```sql -SELECT CURRENT_DATE AS today; --- Result: 2025-09-26 -``` - ---- - -### Function: CURRENT_TIME (Alias: CURTIME) -**Description:** -Returns current time-of-day. - -**Inputs:** -- none - -**Output:** -- `TIME` - -**Example:** -```sql -SELECT CURRENT_TIME AS t; --- Result: 12:34:56 -``` - ---- - -### Function: INTERVAL -**Description:** -Literal syntax for time intervals. - -**Inputs:** -- n (`INT`) -- `UNIT` (`YEAR`|`QUARTER`|`MONTH`|`WEEK`|`DAY`|`HOUR`|`MINUTE`|`SECOND`|`MILLISECOND`|`MICROSECOND`|`NANOSECOND`) - -**Output:** -- `INTERVAL` -- Note: `INTERVAL` is not a standalone type, it can only be used as part of date/datetime arithmetic functions. - -**Example:** -```sql -SELECT DATE_ADD('2025-01-10'::DATE, INTERVAL 1 MONTH); --- Result: 2025-02-10 -``` - -### Function: DATE_ADD (Alias: DATEADD) -**Description:** -Adds interval to `DATE`. - -**Inputs:** -- `date_expr` (`DATE`) -- `INTERVAL` n (`INT`) `UNIT` (`YEAR`|`QUARTER`|`MONTH`|`WEEK`|`DAY`) - -**Output:** -- `DATE` - -**Example:** -```sql -SELECT DATE_ADD('2025-01-10'::DATE, INTERVAL 1 MONTH) AS next_month; --- Result: 2025-02-10 -``` - ---- - -### Function: DATE_SUB (Alias: DATESUB) -**Description:** -Subtract interval from `DATE`. - -**Inputs:** -- `date_expr` (`DATE`) -- `INTERVAL` n (`INT`) `UNIT` (`YEAR`|`QUARTER`|`MONTH`|`WEEK`|`DAY`) - -**Output:** -- `DATE` - -**Example:** -```sql -SELECT DATE_SUB('2025-01-10'::DATE, INTERVAL 7 DAY) AS week_before; --- Result: 2025-01-03 -``` - ---- - -### Function: DATETIME_ADD (Alias: DATETIMEADD) -**Description:** -Adds interval to `DATETIME` / `TIMESTAMP` - -**Inputs:** -- `datetime_expr` (`DATETIME`) -- `INTERVAL` n (`INT`) `UNIT` (`YEAR`|`QUARTER`|`MONTH`|`WEEK`|`DAY`|`HOUR`|`MINUTE`|`SECOND`) - -**Output:** -- `DATETIME` - -**Example:** -```sql -SELECT DATETIME_ADD('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 1 DAY) AS tomorrow; --- Result: 2025-01-11T12:00:00Z -``` - ---- - -### Function: DATETIME_SUB (Alias: DATETIMESUB) -**Description:** -Subtract interval from `DATETIME` / `TIMESTAMP`. - -**Inputs:** -- `datetime_expr` -- `INTERVAL` n (`INT`) `UNIT` (`YEAR`|`QUARTER`|`MONTH`|`WEEK`|`DAY`|`HOUR`|`MINUTE`|`SECOND`) - -**Output:** -- `DATETIME` - -**Example:** -```sql -SELECT DATETIME_SUB('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 2 HOUR) AS earlier; --- Result: 2025-01-10T10:00:00Z -``` - ---- - -### Function: DATEDIFF (Alias: DATE_DIFF) -**Description:** -Difference between 2 dates (date1 - date2) in the specified time unit. - -**Inputs:** -- `date1` (`DATE` or `DATETIME`) -- `date2` (`DATE` or `DATETIME`), -- optional `unit` (`YEAR`|`QUARTER`|`MONTH`|`WEEK`|`DAY`|`HOUR`|`MINUTE`|`SECOND`), `DAY` by default - -**Output:** -- `BIGINT` - -**Example:** -```sql -SELECT DATEDIFF('2025-01-10'::DATE, '2025-01-01'::DATE) AS diff; --- Result: 9 -``` - ---- - -### Function: DATE_FORMAT -**Description:** -Format `DATE` to `VARCHAR`. - -**Inputs:** -- `date_expr` (`DATE`) -- `pattern` (`VARCHAR`) -- Note: pattern follows [MySQL-style](#supported-mysql-style-datetime-patterns). - -**Output:** -- `VARCHAR` - -**Example:** -```sql --- Simple date formatting -SELECT DATE_FORMAT('2025-01-10'::DATE, '%Y-%m-%d') AS fmt; --- Result: '2025-01-10' - --- Day of the week (%W) -SELECT DATE_FORMAT('2025-01-10'::DATE, '%W') AS weekday; --- Result: 'Friday' -``` - ---- - -### Function: DATE_PARSE -**Description:** -Parse `VARCHAR` into `DATE`. - -**Inputs:** -- `VARCHAR` -- `pattern` (`VARCHAR`) -- Note: pattern follows [MySQL-style](#supported-mysql-style-datetime-patterns). - -**Output:** -- `DATE` - -**Example:** -```sql --- Parse ISO-style date -SELECT DATE_PARSE('2025-01-10','%Y-%m-%d') AS d; --- Result: 2025-01-10 - --- Parse with day of week (%W) -SELECT DATE_PARSE('Friday 2025-01-10','%W %Y-%m-%d') AS d; --- Result: 2025-01-10 -``` - ---- - -### Function: DATETIME_PARSE -**Description:** -Parse `VARCHAR` into `DATETIME` / `TIMESTAMP`. - -**Inputs:** -- `VARCHAR` -- `pattern` (`VARCHAR`) -- Note: pattern follows [MySQL-style](#supported-mysql-style-datetime-patterns). - -**Output:** -- `DATETIME` - -**Example:** -```sql --- Parse full datetime with microseconds (%f) -SELECT DATETIME_PARSE('2025-01-10 12:00:00.123456','%Y-%m-%d %H:%i:%s.%f') AS dt; --- Result: 2025-01-10T12:00:00.123456Z - --- Parse 12-hour clock with AM/PM (%p) -SELECT DATETIME_PARSE('2025-01-10 01:45:30 PM','%Y-%m-%d %h:%i:%s %p') AS dt; --- Result: 2025-01-10T13:45:30Z -``` - ---- - -### Function: DATETIME_FORMAT -**Description:** -Format `DATETIME` / `TIMESTAMP` to `VARCHAR` with pattern. - -**Inputs:** -- `datetime_expr` (`DATETIME` or `TIMESTAMP`) -- `pattern` (`VARCHAR`) -- Note: pattern follows [MySQL-style](#supported-mysql-style-datetime-patterns). - -**Output:** -- `VARCHAR` - -**Example:** -```sql --- Format with seconds and microseconds -SELECT DATETIME_FORMAT('2025-01-10T12:00:00.123456Z'::TIMESTAMP,'%Y-%m-%d %H:%i:%s.%f') AS s; --- Result: '2025-01-10 12:00:00.123456' - --- Format 12-hour clock with AM/PM -SELECT DATETIME_FORMAT('2025-01-10T13:45:30Z'::TIMESTAMP,'%Y-%m-%d %h:%i:%s %p') AS s; --- Result: '2025-01-10 01:45:30 PM' - --- Format with full weekday name -SELECT DATETIME_FORMAT('2025-01-10T13:45:30Z'::TIMESTAMP,'%W, %Y-%m-%d') AS s; --- Result: 'Friday, 2025-01-10' -``` - ---- - -### Function: DATE_TRUNC -**Description:** -Truncate date/datetime to a `unit`. - -**Inputs:** -- `date_or_datetime_expr` (`DATE` or `DATETIME`) -- `unit` (`YEAR`|`QUARTER`|`MONTH`|`WEEK`|`DAY`|`HOUR`|`MINUTE`|`SECOND`) - -**Output:** -- `DATE` or `DATETIME` - -**Example:** -```sql -SELECT DATE_TRUNC('2025-01-15'::DATE, MONTH) AS start_month; --- Result: 2025-01-01 -``` - ---- - -### Function: EXTRACT -**Description:** -Extract field from date or datetime. - -**Inputs:** -- `unit` (`YEAR`|`QUARTER`|`MONTH`|`WEEK`|`DAY`|`HOUR`|`MINUTE`|`SECOND`) `FROM` `date_expr` (`DATE` or `DATETIME`) - -**Output:** -- `INT` / `BIGINT` - -**Example:** -```sql -SELECT EXTRACT(YEAR FROM '2025-01-10T12:00:00Z'::TIMESTAMP) AS y; --- Result: 2025 -``` - ---- - -### Function: LAST_DAY -**Description:** -Last day of month for a date. - -**Inputs:** -- `date_expr` (`DATE`) - -**Output:** -- `DATE` - -**Example:** -```sql -SELECT LAST_DAY('2025-02-15'::DATE) AS ld; --- Result: 2025-02-28 -``` - ---- - -### Function: WEEK -**Description:** -ISO week number (1..53) - -**Inputs:** -- `date_expr` (`DATE`) - -**Output:** -- `INT` - -**Example:** -```sql -SELECT WEEK('2025-01-01'::DATE) AS w; --- Result: 1 -``` - ---- - -### Function: QUARTER -**Description:** -Quarter number (1..4) - -**Inputs:** -- `date_expr` (`DATE`) - -**Output:** -- `INT` - -**Example:** -```sql -SELECT QUARTER('2025-05-10'::DATE) AS q; --- Result: 2 -``` - ---- - -### Function: NANOSECOND / MICROSECOND / MILLISECOND -**Description:** -Sub-second extraction. - -**Inputs:** -- `datetime_expr` (`DATETIME` or `TIMESTAMP`) - -**Output:** -- `INT` - -**Example:** -```sql -SELECT MILLISECOND('2025-01-01T12:00:00.123Z'::TIMESTAMP) AS ms; --- Result: 123 -``` - ---- - -### Function: EPOCHDAY -**Description:** -Days since epoch. - -**Inputs:** -- `date_expr` (`DATE`) - -**Output:** -- `BIGINT` - -**Example:** -```sql -SELECT EPOCHDAY('1970-01-02'::DATE) AS d; --- Result: 1 -``` - ---- - -### Function: OFFSET_SECONDS -**Description:** -Timezone offset in seconds. - -**Inputs:** -- `timestamp_expr` (`TIMESTAMP` with timezone) - -**Output:** -- `INT` - -**Example:** -```sql -SELECT OFFSET_SECONDS('2025-01-01T12:00:00+02:00'::TIMESTAMP) AS off; --- Result: 7200 -``` - ---- - -### Supported MySQL-style Date/Time Patterns - -| Pattern | Description | Example Output | -|---------|------------------------------|----------------| -| `%Y` | Year (4 digits) | `2025` | -| `%y` | Year (2 digits) | `25` | -| `%m` | Month (2 digits) | `01` | -| `%c` | Month (1–12) | `1` | -| `%M` | Month name (full) | `January` | -| `%b` | Month name (abbrev) | `Jan` | -| `%d` | Day of month (2 digits) | `10` | -| `%e` | Day of month (1–31) | `9` | -| `%W` | Weekday name (full) | `Friday` | -| `%a` | Weekday name (abbrev) | `Fri` | -| `%H` | Hour (00–23) | `13` | -| `%h` | Hour (01–12) | `01` | -| `%I` | Hour (01–12, synonym for %h) | `01` | -| `%i` | Minutes (00–59) | `45` | -| `%s` | Seconds (00–59) | `30` | -| `%f` | Microseconds (000–999) | `123` | -| `%p` | AM/PM marker | `AM` / `PM` | - -[Back to index](./README.md) diff --git a/documentation/functions_geo.md b/documentation/functions_geo.md deleted file mode 100644 index dc62f499..00000000 --- a/documentation/functions_geo.md +++ /dev/null @@ -1,44 +0,0 @@ -[Back to index](./README.md) - -# Geo Functions - ---- - -### Function: ST_DISTANCE (Alias: DISTANCE) -**Description:** - -Computes the geodesic distance (great-circle distance) in meters between two points. - -**Inputs:** - -Each point can be: -- A column of type `geo_point` in Elasticsearch -- A literal defined with `POINT(lat, lon)` - -If both arguments are fixed points, the distance is **precomputed at query compilation time**. - -**Output:** -- `DOUBLE` (distance in meters) - -**Examples:** - -- Distance between a fixed point and a field -```sql - SELECT ST_DISTANCE(POINT(-70.0, 40.0), toLocation) AS d - FROM locations; -``` -- Distance between two fields -```sql -SELECT ST_DISTANCE(fromLocation, toLocation) AS d -FROM locations; -``` -- Distance between two fixed points (precomputed) -```sql -SELECT ST_DISTANCE( - POINT(-70.0, 40.0), - POINT(0.0, 0.0) -) AS d; - -- Precomputed result: 8318612.0 (meters) -``` - -[Back to index](./README.md) diff --git a/documentation/functions_math.md b/documentation/functions_math.md deleted file mode 100644 index d5748d83..00000000 --- a/documentation/functions_math.md +++ /dev/null @@ -1,184 +0,0 @@ -[Back to index](./README.md) - -# Mathematical Functions - -**Navigation:** [Functions — Aggregate](./functions_aggregate.md) · [Functions — String](./functions_string.md) - ---- - -### Function: ABS -**Description:** -Absolute value. - -**Inputs:** -- `x` (`NUMERIC`) - -**Output:** -- `NUMERIC` - -**Example:** -```sql -SELECT ABS(-5) AS a; --- Result: 5 -``` - -### Function: ROUND -**Description:** -Round to n decimals (optional). - -**Inputs:** `x` (`NUMERIC`), optional `n` (`INT`) - -**Output:** -- `DOUBLE` - -**Example:** -```sql -SELECT ROUND(123.456, 2) AS r; --- Result: 123.46 -``` - -### Function: FLOOR -**Description:** -Greatest `BIGINT` ≤ x. - -**Inputs:** -- `x` (`NUMERIC`) - -**Output:** -- `BIGINT` - -**Example:** -```sql -SELECT FLOOR(3.9) AS f; --- Result: 3 -``` - -### Function: CEIL (Alias: CEILING) -**Description:** -Smallest `BIGINT` ≥ x. - -**Inputs:** -- `x` (`NUMERIC`) - -**Output:** -- `BIGINT` - -**Example:** -```sql -SELECT CEIL(3.1) AS c; --- Result: 4 -``` - -### Function: POWER (Alias: POW) -**Description:** -x^y. - -**Inputs:** -- `x` (`NUMERIC`), `y` (`NUMERIC`) - -**Output:** -- `NUMERIC` - -**Example:** -```sql -SELECT POWER(2, 10) AS p; --- Result: 1024 -``` - -### Function: SQRT -**Description:** -Square root. - -**Inputs:** -- `x` (`NUMERIC` >= 0) - -**Output:** -- `NUMERIC` - -**Example:** -```sql -SELECT SQRT(16) AS s; --- Result: 4 -``` - -### Function: LOG (Alias: LN) -**Description:** -Natural logarithm. - -**Inputs:** -- `x` (`NUMERIC` > 0) - -**Output:** -- `NUMERIC` - -**Example:** -```sql -SELECT LOG(EXP(1)) AS l; --- Result: 1 -``` - -### Function: LOG10 -**Description:** -Base-10 logarithm. - -**Inputs:** -- `x` (`NUMERIC` > 0) - -**Output:** -- `NUMERIC` - -**Example:** -```sql -SELECT LOG10(1000) AS l10; --- Result: 3 -``` - -### Function: EXP -**Description:** -e^x. - -**Inputs:** -- `x` (`NUMERIC`) - -**Output:** -- `NUMERIC` - -**Example:** -```sql -SELECT EXP(1) AS e; --- Result: 2.71828... -``` - -### Function: SIGN (Alias SGN) -**Description:** -Returns -1, 0, or 1 according to sign. - -**Inputs:** -- `x` (`NUMERIC`) - -**Output:** -- `TINYINT` - -**Example:** -```sql -SELECT SIGN(-10) AS s; --- Result: -1 -``` - -### Trigonometric functions: COS, ACOS, SIN, ASIN, TAN, ATAN, ATAN2 -**Description:** -Standard trigonometric functions. Inputs in radians. - -**Inputs:** -- `x` or (`y`, `x` for ATAN2) - -**Output:** -- `DOUBLE` - -**Example:** -```sql -SELECT COS(PI()/3) AS c; --- Result: 0.5 -``` - -[Back to index](./README.md) diff --git a/documentation/functions_string.md b/documentation/functions_string.md deleted file mode 100644 index 18be5214..00000000 --- a/documentation/functions_string.md +++ /dev/null @@ -1,260 +0,0 @@ -[Back to index](./README.md) - -# String Functions - ---- - -### Function: UPPER (Alias: UCASE) -**Description:** -Convert string to upper case. - -**Inputs:** -- `str` (`VARCHAR`) - -**Output:** -- `VARCHAR` - -**Example:** -```sql -SELECT UPPER('hello') AS up; --- Result: 'HELLO' -``` - -### Function: LOWER (Alias: LCASE) -**Description:** -Convert string to lower case. - -**Inputs:** -- `str` (`VARCHAR`) - -**Output:** -- `VARCHAR` - -**Example:** -```sql -SELECT LOWER('Hello') AS lo; --- Result: 'hello' -``` - -### Function: TRIM -**Description:** -Trim whitespace both sides. - -**Inputs:** -- `str` (`VARCHAR`) - -**Output:** -- `VARCHAR` - -**Example:** -```sql -SELECT TRIM(' abc ') AS t; --- Result: 'abc' -``` - -### Function: LTRIM -**Description:** -Trim whitespace left side. - -**Inputs:** -- `str` (`VARCHAR`) - -**Output:** -- `VARCHAR` - -**Example:** -```sql -SELECT LTRIM(' abc ') AS t; --- Result: 'abc ' -``` - -### Function: RTRIM -**Description:** -Trim whitespace right side. - -**Inputs:** -- `str` (`VARCHAR`) - -**Output:** -- `VARCHAR` - -**Example:** -```sql -SELECT RTRIM(' abc ') AS t; --- Result: ' abc' -``` - -### Function: LENGTH (Alias: LEN) -**Description:** -Character length. - -**Inputs:** -- `str` (`VARCHAR`) - -**Output:** -- `BIGINT` - -**Example:** -```sql -SELECT LENGTH('abc') AS l; --- Result: 3 -``` - -### Function: SUBSTRING (Alias: SUBSTR) -**Description:** -SQL 1-based substring. - -**Inputs:** -- `str` (`VARCHAR`) `,`|`FROM` `start` (`INT` >= 1) optional `,`|`FOR` `length` (`INT`) - -**Output:** -- `VARCHAR` - -**Example:** -```sql -SELECT SUBSTRING('abcdef', 2, 3) AS s; --- Result: 'bcd' - -SELECT SUBSTRING('abcdef' FROM 2 FOR 3) AS s; --- Result: 'bcd' - -SELECT SUBSTRING('abcdef' FROM 4) AS s; --- Result: 'def' -``` - -### Function: LEFT -**Description:** -Returns the leftmost characters from a string. - -**Inputs:** -- `str` (`VARCHAR`) `,`|`FOR` `length` (`INT`) - -**Output:** -- `VARCHAR` - -**Example:** -```sql -SELECT LEFT('abcdef', 3) AS l; --- Result: 'abc' -``` - -### Function: RIGHT -**Description:** -Returns the rightmost characters from a string. -If `length` exceeds the string size, the implementation returns the full string. -If `length = 0`, an empty string is returned. -If `length < 0`, a validation error is raised. - -**Inputs:** -- `str` (`VARCHAR`) `,`|`FOR` `length` (`INT`) - -**Output:** -- `VARCHAR` - -**Example:** -```sql -SELECT RIGHT('abcdef', 3) AS r; --- Result: 'def' - -SELECT RIGHT('abcdef' FOR 10) AS r; --- Result: 'abcdef' -``` - -### Function: CONCAT -**Description:** -Concatenate values into a string. - -**Inputs:** -- `expr1, expr2, ...` (coercible to `VARCHAR`) - -**Output:** -- `VARCHAR` - -**Example:** -```sql -SELECT CONCAT(firstName, ' ', lastName) AS full FROM users; -``` - -### Function: REPLACE -**Description:** -Replaces all occurrences of a substring with another substring. - -**Inputs:** -- `str, search, replace` - -**Output:** -- `VARCHAR` - -**Example:** -```sql -SELECT REPLACE('Mr. John', 'Mr. ', '') AS r; --- Result: 'John' -``` - -### Function: REVERSE -**Description:** -Reverses the characters in a string. - -**Inputs:** -- `str` (`VARCHAR`) - -**Output:** -- `VARCHAR` - -**Example:** -```sql -SELECT REVERSE('abcdef') AS r; --- Result: 'fedcba' -``` - -### Function: POSITION (Alias: STRPOS) -**Description:** -Returns the 1-based position of the first occurrence of a substring in a string. -If the substring is not found, returns 0. -An optional FROM position (1-based) can be provided to start the search. - -**Inputs:** -- `substr` `,` | `IN` `str` optional `,` | `FROM` `INT` - -**Output:** -- `BIGINT` - -**Example:** -```sql -SELECT POSITION('lo', 'hello') AS pos; --- Result: 4 - -SELECT POSITION('a' IN 'Elasticsearch' FROM 5) AS pos; --- Result: 10 - -SELECT POSITION('z' IN 'Elasticsearch') AS pos; --- Result: 0 -``` - -### Function: REGEXP_LIKE (Alias: REGEXP) -**Description:** -`REGEXP_LIKE(string, pattern [, match_param])` - -Returns `TRUE` if the input string matches the regular expression `pattern`. -By default, the match is case-sensitive. - -**Inputs:** -- `string`: The input string to test. -- `pattern`: A regular expression pattern. -- `match_param` *(optional)*: A string controlling the regex matching behavior. - - `'i'`: Case-insensitive match. - - `'c'`: Case-sensitive match (default). - - `'m'`: Multi-line mode. - - `'n'`: Allows the `.` to match newline characters. - -**Output:** -- `BOOLEAN` - -**Examples:** -```sql -SELECT REGEXP_LIKE('Hello', 'HEL'); -- false -SELECT REGEXP_LIKE('Hello', 'HEL', 'i'); -- true -SELECT REGEXP_LIKE('abc\nxyz', '^xyz', 'm') -- true -``` - -[Back to index](./README.md) diff --git a/documentation/functions_type_conversion.md b/documentation/functions_type_conversion.md deleted file mode 100644 index 7040d495..00000000 --- a/documentation/functions_type_conversion.md +++ /dev/null @@ -1,44 +0,0 @@ -[Back to index](./README.md) - -# Type Conversion Functions - ---- - -### Function: CAST (Alias: CONVERT) -**Description:** - -Cast expression to a target SQL type. - -**Inputs:** -- `expr` -- `TYPE` (`DATE`, `TIMESTAMP`, `VARCHAR`, `INT`, `DOUBLE`, etc.) - -**Output:** -- `TYPE` - -**Example:** -```sql -SELECT CAST(salary AS DOUBLE) AS s FROM emp; --- Result: 12345.0 -``` - -### Function: TRY_CAST (Alias: SAFE_CAST) -**Description:** - -Attempt a cast and return NULL on failure (safer alternative). - -**Inputs:** -- `expr` -- `TYPE` (`DATE`, `TIMESTAMP`, `VARCHAR`, `INT`, `DOUBLE`, etc.) - -**Output:** - -- `TYPE`or `NULL` - -**Example:** -```sql -SELECT TRY_CAST('not-a-number' AS INT) AS maybe_null; --- Result: NULL -``` - -[Back to index](./README.md) diff --git a/documentation/operator_precedence.md b/documentation/operator_precedence.md deleted file mode 100644 index 8c3697db..00000000 --- a/documentation/operator_precedence.md +++ /dev/null @@ -1,24 +0,0 @@ -[Back to index](./README.md) - -# Operator Precedence - -This page lists operator precedence used by the parser and evaluator (highest precedence at top). - -1. Parentheses `(...)` -2. Unary operators: `-` (negation), `+` (unary plus), `NOT` -3. Multiplicative: `*`, `/`, `%` -4. Additive: `+`, `-` -5. Comparison: `<`, `<=`, `>`, `>=` -6. Equality: `=`, `!=`, `<>` -7. Membership & pattern: `BETWEEN`, `IN`, `LIKE`, `RLIKE` -8. Logical `AND` -9. Logical `OR` - -**Notes and examples** -```sql -SELECT 1 + 2 * 3 AS v; -- v = 7 -SELECT (1 + 2) * 3 AS v; -- v = 9 -SELECT a BETWEEN 1 AND 3 OR b = 5; -- interpreted as (a BETWEEN 1 AND 3) OR (b = 5) -``` - -[Back to index](./README.md) diff --git a/documentation/operators.md b/documentation/operators.md deleted file mode 100644 index 52aa9fc6..00000000 --- a/documentation/operators.md +++ /dev/null @@ -1,306 +0,0 @@ -[Back to index](./README.md) - -# Operators (detailed) - -**Navigation:** [Query Structure](./request_structure.md) · [Operator Precedence](./operator_precedence.md) · [Keywords](./keywords.md) - -This file provides a per-operator description and a concrete SQL example for each operator supported by the engine. - ---- - -### Math operators - -#### Operator: `+` -**Description:** - -Arithmetic addition. - -**Example:** -```sql -SELECT salary + bonus AS total_comp FROM emp; --- result example: if salary=50000 and bonus=10000 -> total_comp = 60000 -``` - -#### Operator: `-` -**Description:** - -Arithmetic subtraction or unary negation when used with single operand. - -**Example:** -```sql -SELECT salary - tax AS net FROM emp; -SELECT -balance AS negative_balance FROM accounts; -``` - -#### Operator: `*` -**Description:** - -Multiplication. - -**Example:** -```sql -SELECT quantity * price AS revenue FROM sales; -``` - -#### Operator: `/` -**Description:** - -Division; division by zero must be guarded (NULLIF), engine returns NULL for invalid arithmetic. - -**Example:** -```sql -SELECT total / NULLIF(count, 0) AS avg FROM table; -``` - -#### Operator: `%` (MOD) -**Description:** - -Remainder/modulo operator. - -**Example:** -```sql -SELECT id % 10 AS bucket FROM users; -``` - ---- - -### Comparison operators - -#### Operator: `=` -**Description:** - -Equality comparison. - -**Return type:** - -- `BOOLEAN` - -**Example:** -```sql -SELECT * FROM emp WHERE department = 'IT'; -``` - -#### Operator: `<>`, `!=` -**Description:** - -Inequality comparison (both synonyms supported). - -**Return type:** - -- `BOOLEAN` - -**Example:** -```sql -SELECT * FROM emp WHERE status <> 'terminated'; -``` - -#### Operator: `<`, `<=`, `>`, `>=` -**Description:** - -Relational comparisons. - -**Return type:** - -- `BOOLEAN` - -**Example:** -```sql -SELECT * FROM emp WHERE age >= 21 AND age < 65; -``` - -#### Operator: `IN` -**Description:** - -Membership in a set of literal or numeric values or results of subquery (subquery support depends on implementation). - -**Return type:** - -- `BOOLEAN` - -**Example:** -```sql -SELECT * FROM emp WHERE department IN ('Sales', 'IT', 'HR'); -SELECT * FROM emp WHERE status IN (1, 2); -``` - -#### Operator: `NOT IN` -**Description:** - -Negated membership. - -**Return type:** - -- `BOOLEAN` - -**Example:** -```sql -SELECT * FROM emp WHERE department NOT IN ('HR','Legal'); -``` - -#### Operator: `BETWEEN ... AND ...` - -**Description:** - -Checks if an expression lies between two boundaries (inclusive). - -For numeric expressions, `BETWEEN` works as standard SQL. - -For distance expressions (`ST_DISTANCE`), it supports units (`m`, `km`, `mi`, etc.). - -**Return type:** - -- `BOOLEAN` - -**Examples:** - -- Numeric BETWEEN -```sql -SELECT age -FROM users -WHERE age BETWEEN 18 AND 30; -``` - -- Temporal BETWEEN -```sql -SELECT * -FROM users -WHERE createdAt BETWEEN CURRENT_DATE - INTERVAL 1 MONTH AND CURRENT_DATE -AND -lastUpdated BETWEEN LAST_DAY('2025-09-11'::DATE) AND DATE_TRUNC(CURRENT_TIMESTAMP, DAY) -``` - -- Distance BETWEEN (using meters) - -```sql -SELECT id -FROM locations -WHERE ST_DISTANCE(POINT(-70.0, 40.0), toLocation) -BETWEEN 4000 AND 5000; -``` - -- Distance BETWEEN (with explicit units) - -```sql -SELECT id -FROM locations -WHERE ST_DISTANCE(POINT(-70.0, 40.0), toLocation) BETWEEN 4000 km AND 5000 km; -``` - -👉 In Elasticsearch translation, the last 2 examples are optimized into a combination of: -- a **script filter** for the lower bound -- a `geo_distance` **query** for the upper bound (native ES optimization) - -#### Operator: `IS NULL` -**Description:** - -Null check predicate. - -**Return type:** - -- `BOOLEAN` - -**Example:** -```sql -SELECT * FROM emp WHERE manager IS NULL; -``` - -#### Operator: `IS NOT NULL` -**Description:** - -Negated null check. - -**Return type:** - -- `BOOLEAN` - -**Example:** -```sql -SELECT * FROM emp WHERE manager IS NOT NULL; -``` - -#### Operator: `LIKE` -**Description:** - -Pattern match using `%` and `_`. Engine converts `%` → `.*` and `_` → `.` for underlying regex matching. - -**Return type:** - -- `BOOLEAN` - -**Example:** -```sql -SELECT * FROM emp WHERE name LIKE 'Jo%'; -``` - -#### Operator: `RLIKE` -**Description:** - -Regular-expression match (Java regex semantics). - -**Return type:** - -- `BOOLEAN` - -**Example:** -```sql -SELECT * FROM users WHERE email RLIKE '.*@example\.com$'; -``` - ---- - -### Logical operators - -#### Operator: `AND` -**Description:** - -Logical conjunction. - -**Example:** -```sql -SELECT * FROM emp WHERE dept = 'IT' AND salary > 50000; -``` - -#### Operator: `OR` -**Description:** - -Logical disjunction. - -**Example:** -```sql -SELECT * FROM emp WHERE dept = 'IT' OR dept = 'Sales'; -``` - -#### Operator: `NOT` -**Description:** - -Logical negation. - -**Example:** -```sql -SELECT * FROM emp WHERE NOT active; -``` - ---- - -### Cast operators - -#### Operator : `::` - -**Description:** - -Provides an alternative syntax to the [CAST](./functions_type_conversion.md#function-cast-aliases-convert) function. - -**Inputs:** -- `expr` -- `TYPE` (`DATE`, `TIMESTAMP`, `VARCHAR`, `INT`, `DOUBLE`, etc.) - -**Return type:** - -- `TYPE` - -**Examples:** -```sql -SELECT hire_date::DATE FROM emp; -``` - -[Back to index](./README.md) diff --git a/documentation/README.md b/documentation/sql/README.md similarity index 100% rename from documentation/README.md rename to documentation/sql/README.md diff --git a/documentation/sql/functions_aggregate.md b/documentation/sql/functions_aggregate.md new file mode 100644 index 00000000..aa755dfe --- /dev/null +++ b/documentation/sql/functions_aggregate.md @@ -0,0 +1,1235 @@ +[Back to index](README.md) + +# Aggregate Functions + +**Navigation:** [Functions — Date / Time](functions_date_time.md) · [Functions — Conditional](functions_conditional.md) + +This page documents aggregate functions for summarizing and analyzing data. + +--- + +## Table of Contents + +1. [COUNT](#function-count) +2. [SUM](#function-sum) +3. [AVG](#function-avg) +4. [MIN](#function-min) +5. [MAX](#function-max) +6. [FIRST_VALUE](#function-first_value) +7. [LAST_VALUE](#function-last_value) +8. [ARRAY_AGG](#function-array_agg) + +--- + +## Overview + +Aggregate functions perform calculations on sets of rows and return a single result. They are commonly used with `GROUP BY` clauses to summarize data by categories. + +**Key Concepts:** +- **Aggregate Functions**: Operate on multiple rows to produce a single result +- **GROUP BY**: Groups rows that have the same values in specified columns +- **HAVING**: Filters groups based on aggregate conditions +- **Window Functions**: Perform calculations across rows related to the current row + +--- + +## Function: COUNT + +**Description:** +Count rows or non-null expressions. With `DISTINCT` counts distinct values. + +**Syntax:** +```sql +COUNT(*) +COUNT(expr) +COUNT(DISTINCT expr) +``` + +**Inputs:** +- `*` - Count all rows (including NULLs) +- `expr` - Count non-NULL values in expression +- `DISTINCT expr` - Count distinct non-NULL values + +**Output:** +- `BIGINT` - Number of rows/values + +**NULL Handling:** +- `COUNT(*)` includes rows with NULL values +- `COUNT(expr)` excludes NULL values +- `COUNT(DISTINCT expr)` excludes NULL values + +**Examples:** + +**Basic COUNT:** +```sql +-- Count all rows +SELECT COUNT(*) AS total FROM emp; +-- Result: total = 42 + +-- Count non-NULL values +SELECT COUNT(manager) AS employees_with_manager FROM emp; + +-- Count with WHERE +SELECT COUNT(*) AS it_employees +FROM emp +WHERE department = 'IT'; +``` + +**COUNT DISTINCT:** +```sql +-- Count distinct salaries +SELECT COUNT(DISTINCT salary) AS distinct_salaries FROM emp; +-- Result: 8 + +-- Count distinct departments +SELECT COUNT(DISTINCT department) AS dept_count FROM emp; + +-- Count distinct non-NULL emails +SELECT COUNT(DISTINCT email) AS unique_emails FROM users; +``` + +**COUNT with GROUP BY:** +```sql +-- Count employees per department +SELECT + department, + COUNT(*) AS employee_count +FROM emp +GROUP BY department; + +-- Count orders per customer +SELECT + customer_id, + COUNT(*) AS order_count +FROM orders +GROUP BY customer_id; + +-- Count with multiple columns +SELECT + department, + job_title, + COUNT(*) AS count +FROM emp +GROUP BY department, job_title; +``` + +**COUNT with HAVING:** +```sql +-- Departments with more than 10 employees +SELECT + department, + COUNT(*) AS count +FROM emp +GROUP BY department +HAVING COUNT(*) > 10; + +-- Customers with multiple orders +SELECT + customer_id, + COUNT(*) AS order_count +FROM orders +GROUP BY customer_id +HAVING COUNT(*) > 1; +``` + +**COUNT vs COUNT(*):** +```sql +-- COUNT(*) counts all rows +SELECT COUNT(*) AS total_rows FROM emp; +-- Result: 42 (includes rows with NULL values) + +-- COUNT(column) counts non-NULL values +SELECT COUNT(email) AS rows_with_email FROM emp; +-- Result: 38 (excludes 4 NULL emails) + +-- Difference shows NULL count +SELECT + COUNT(*) AS total, + COUNT(email) AS with_email, + COUNT(*) - COUNT(email) AS without_email +FROM emp; +``` + +**Practical Examples:** + +**1. Data quality check:** +```sql +SELECT + COUNT(*) AS total_records, + COUNT(email) AS records_with_email, + COUNT(phone) AS records_with_phone, + COUNT(CASE WHEN email IS NOT NULL AND phone IS NOT NULL THEN 1 END) AS complete_records +FROM contacts; +``` + +**2. Completion rate:** +```sql +SELECT + department, + COUNT(*) AS total, + COUNT(performance_review) AS reviewed, + ROUND(COUNT(performance_review) * 100.0 / COUNT(*), 2) AS review_completion_rate +FROM employees +GROUP BY department; +``` + +**3. Active users:** +```sql +SELECT + DATE_TRUNC('month', login_date) AS month, + COUNT(DISTINCT user_id) AS active_users +FROM user_logins +WHERE login_date >= DATE_SUB(CURRENT_DATE, INTERVAL 6 MONTH) +GROUP BY DATE_TRUNC('month', login_date) +ORDER BY month; +``` + +--- + +## Function: SUM + +**Description:** +Sum of values. + +**Syntax:** +```sql +SUM(expr) +SUM(DISTINCT expr) +``` + +**Inputs:** +- `expr` - Numeric expression (`INT`, `DOUBLE`, `DECIMAL`, etc.) +- `DISTINCT expr` - Sum of distinct values only + +**Output:** +- `NUMERIC` - Sum of values (same type as input, or promoted) + +**NULL Handling:** +- NULL values are ignored +- If all values are NULL, returns NULL +- Empty set returns NULL + +**Examples:** + +**Basic SUM:** +```sql +-- Total salary +SELECT SUM(salary) AS total_salary FROM emp; + +-- Total revenue +SELECT SUM(amount) AS total_revenue FROM sales; + +-- Sum with WHERE +SELECT SUM(salary) AS it_total_salary +FROM emp +WHERE department = 'IT'; +``` + +**SUM with GROUP BY:** +```sql +-- Total salary per department +SELECT + department, + SUM(salary) AS total_salary +FROM emp +GROUP BY department; + +-- Revenue per product +SELECT + product_id, + SUM(quantity * price) AS total_revenue +FROM order_items +GROUP BY product_id; + +-- Monthly sales +SELECT + DATE_TRUNC('month', order_date) AS month, + SUM(total_amount) AS monthly_revenue +FROM orders +GROUP BY DATE_TRUNC('month', order_date) +ORDER BY month; +``` + +**SUM DISTINCT:** +```sql +-- Sum of distinct salaries (removes duplicates) +SELECT SUM(DISTINCT salary) AS sum_distinct_salaries FROM emp; + +-- Sum of unique prices +SELECT SUM(DISTINCT price) AS sum_unique_prices FROM products; +``` + +**SUM with Calculations:** +```sql +-- Total compensation (salary + bonus) +SELECT + department, + SUM(salary + COALESCE(bonus, 0)) AS total_compensation +FROM emp +GROUP BY department; + +-- Total order value with tax +SELECT + SUM(subtotal + tax + shipping) AS total_order_value +FROM orders; + +-- Weighted average preparation +SELECT + SUM(score * weight) AS weighted_sum, + SUM(weight) AS total_weight +FROM test_scores; +``` + +**SUM with CASE:** +```sql +-- Conditional sum +SELECT + SUM(CASE WHEN status = 'completed' THEN amount ELSE 0 END) AS completed_revenue, + SUM(CASE WHEN status = 'pending' THEN amount ELSE 0 END) AS pending_revenue +FROM orders; + +-- Sum by category +SELECT + department, + SUM(CASE WHEN gender = 'M' THEN salary ELSE 0 END) AS male_total, + SUM(CASE WHEN gender = 'F' THEN salary ELSE 0 END) AS female_total +FROM emp +GROUP BY department; +``` + +**Practical Examples:** + +**1. Financial summary:** +```sql +SELECT + DATE_TRUNC('quarter', order_date) AS quarter, + SUM(total_amount) AS revenue, + SUM(cost) AS expenses, + SUM(total_amount - cost) AS profit +FROM orders +GROUP BY DATE_TRUNC('quarter', order_date) +ORDER BY quarter; +``` + +**2. Inventory value:** +```sql +SELECT + category, + SUM(quantity * unit_price) AS inventory_value +FROM inventory +GROUP BY category +ORDER BY inventory_value DESC; +``` + +**3. Running total (with window function):** +```sql +SELECT + order_date, + amount, + SUM(amount) OVER (ORDER BY order_date) AS running_total +FROM orders +ORDER BY order_date; +``` + +--- + +## Function: AVG + +**Description:** +Average of values. + +**Syntax:** +```sql +AVG(expr) +AVG(DISTINCT expr) +``` + +**Inputs:** +- `expr` - Numeric expression +- `DISTINCT expr` - Average of distinct values only + +**Output:** +- `DOUBLE` - Average value + +**NULL Handling:** +- NULL values are ignored +- If all values are NULL, returns NULL +- Empty set returns NULL + +**Examples:** + +**Basic AVG:** +```sql +-- Average salary +SELECT AVG(salary) AS avg_salary FROM emp; + +-- Average with WHERE +SELECT AVG(salary) AS avg_it_salary +FROM emp +WHERE department = 'IT'; + +-- Average order value +SELECT AVG(total_amount) AS avg_order_value FROM orders; +``` + +**AVG with GROUP BY:** +```sql +-- Average salary per department +SELECT + department, + AVG(salary) AS avg_salary +FROM emp +GROUP BY department; + +-- Average rating per product +SELECT + product_id, + AVG(rating) AS avg_rating, + COUNT(*) AS review_count +FROM reviews +GROUP BY product_id; + +-- Average daily sales +SELECT + DATE_TRUNC('day', order_date) AS day, + AVG(total_amount) AS avg_daily_order +FROM orders +GROUP BY DATE_TRUNC('day', order_date) +ORDER BY day; +``` + +**AVG DISTINCT:** +```sql +-- Average of distinct salaries +SELECT AVG(DISTINCT salary) AS avg_distinct_salary FROM emp; + +-- Average of unique prices +SELECT + category, + AVG(DISTINCT price) AS avg_unique_price +FROM products +GROUP BY category; +``` + +**AVG with Rounding:** +```sql +-- Round average to 2 decimals +SELECT + department, + ROUND(AVG(salary), 2) AS avg_salary +FROM emp +GROUP BY department; + +-- Format as currency +SELECT + CONCAT('$', ROUND(AVG(salary), 2)) AS avg_salary_formatted +FROM emp; +``` + +**AVG vs Manual Calculation:** +```sql +-- Using AVG function +SELECT AVG(salary) AS avg_salary FROM emp; + +-- Manual calculation (equivalent) +SELECT SUM(salary) / COUNT(salary) AS avg_salary FROM emp; + +-- Difference with NULL handling +SELECT + AVG(bonus) AS avg_with_function, + SUM(bonus) / COUNT(*) AS avg_manual_all, + SUM(bonus) / COUNT(bonus) AS avg_manual_non_null +FROM emp; +``` + +**Practical Examples:** + +**1. Performance metrics:** +```sql +SELECT + employee_id, + AVG(sales_amount) AS avg_sale, + AVG(customer_rating) AS avg_rating, + COUNT(*) AS total_sales +FROM sales +WHERE sale_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 MONTH) +GROUP BY employee_id +HAVING AVG(sales_amount) > 1000; +``` + +**2. Grade analysis:** +```sql +SELECT + course_id, + AVG(score) AS avg_score, + MIN(score) AS min_score, + MAX(score) AS max_score, + COUNT(*) AS student_count +FROM exam_results +GROUP BY course_id +ORDER BY avg_score DESC; +``` + +**3. Response time analysis:** +```sql +SELECT + service_name, + AVG(response_time_ms) AS avg_response, + PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY response_time_ms) AS p95_response +FROM api_logs +WHERE log_date >= CURRENT_DATE - INTERVAL 1 DAY +GROUP BY service_name; +``` + +--- + +## Function: MIN + +**Description:** +Minimum value in group. + +**Syntax:** +```sql +MIN(expr) +``` + +**Inputs:** +- `expr` - Any comparable type (numeric, string, date, etc.) + +**Output:** +- Same type as input + +**NULL Handling:** +- NULL values are ignored +- If all values are NULL, returns NULL +- Empty set returns NULL + +**Examples:** + +**Basic MIN:** +```sql +-- Minimum salary +SELECT MIN(salary) AS min_salary FROM emp; + +-- Earliest hire date +SELECT MIN(hire_date) AS earliest FROM emp; + +-- Lowest price +SELECT MIN(price) AS lowest_price FROM products; +``` + +**MIN with Different Types:** +```sql +-- Numeric MIN +SELECT MIN(age) AS youngest FROM users; + +-- Date MIN +SELECT MIN(order_date) AS first_order FROM orders; + +-- String MIN (alphabetically first) +SELECT MIN(name) AS first_alphabetically FROM products; + +-- Timestamp MIN +SELECT MIN(created_at) AS earliest_record FROM logs; +``` + +**MIN with GROUP BY:** +```sql +-- Minimum salary per department +SELECT + department, + MIN(salary) AS min_salary +FROM emp +GROUP BY department; + +-- Earliest order per customer +SELECT + customer_id, + MIN(order_date) AS first_order_date +FROM orders +GROUP BY customer_id; + +-- Lowest price per category +SELECT + category, + MIN(price) AS min_price, + MAX(price) AS max_price +FROM products +GROUP BY category; +``` + +**MIN with WHERE:** +```sql +-- Minimum salary in IT department +SELECT MIN(salary) AS min_it_salary +FROM emp +WHERE department = 'IT'; + +-- Earliest order in 2025 +SELECT MIN(order_date) AS first_2025_order +FROM orders +WHERE YEAR(order_date) = 2025; +``` + +**Practical Examples:** + +**1. Find oldest/newest records:** +```sql +SELECT + customer_id, + MIN(order_date) AS first_order, + MAX(order_date) AS last_order, + DATEDIFF(MAX(order_date), MIN(order_date)) AS customer_lifetime_days +FROM orders +GROUP BY customer_id; +``` + +**2. Price range analysis:** +```sql +SELECT + category, + MIN(price) AS min_price, + AVG(price) AS avg_price, + MAX(price) AS max_price, + MAX(price) - MIN(price) AS price_range +FROM products +GROUP BY category; +``` + +**3. Performance bounds:** +```sql +SELECT + server_name, + MIN(response_time) AS best_response, + AVG(response_time) AS avg_response, + MAX(response_time) AS worst_response +FROM server_logs +WHERE log_date = CURRENT_DATE +GROUP BY server_name; +``` + +--- + +## Function: MAX + +**Description:** +Maximum value in group. + +**Syntax:** +```sql +MAX(expr) +``` + +**Inputs:** +- `expr` - Any comparable type (numeric, string, date, etc.) + +**Output:** +- Same type as input + +**NULL Handling:** +- NULL values are ignored +- If all values are NULL, returns NULL +- Empty set returns NULL + +**Examples:** + +**Basic MAX:** +```sql +-- Maximum salary +SELECT MAX(salary) AS top_salary FROM emp; + +-- Latest hire date +SELECT MAX(hire_date) AS most_recent FROM emp; + +-- Highest price +SELECT MAX(price) AS highest_price FROM products; +``` + +**MAX with Different Types:** +```sql +-- Numeric MAX +SELECT MAX(age) AS oldest FROM users; + +-- Date MAX +SELECT MAX(order_date) AS last_order FROM orders; + +-- String MAX (alphabetically last) +SELECT MAX(name) AS last_alphabetically FROM products; + +-- Timestamp MAX +SELECT MAX(updated_at) AS latest_update FROM records; +``` + +**MAX with GROUP BY:** +```sql +-- Maximum salary per department +SELECT + department, + MAX(salary) AS max_salary +FROM emp +GROUP BY department; + +-- Latest order per customer +SELECT + customer_id, + MAX(order_date) AS last_order_date +FROM orders +GROUP BY customer_id; + +-- Highest price per category +SELECT + category, + MAX(price) AS max_price +FROM products +GROUP BY category; +``` + +**MAX with WHERE:** +```sql +-- Maximum salary in IT department +SELECT MAX(salary) AS max_it_salary +FROM emp +WHERE department = 'IT'; + +-- Latest order in 2025 +SELECT MAX(order_date) AS last_2025_order +FROM orders +WHERE YEAR(order_date) = 2025; +``` + +**Practical Examples:** + +**1. Find top performers:** +```sql +SELECT + department, + MAX(salary) AS top_salary, + AVG(salary) AS avg_salary, + MAX(salary) - AVG(salary) AS gap_to_top +FROM emp +GROUP BY department; +``` + +**2. Latest activity:** +```sql +SELECT + user_id, + MAX(login_date) AS last_login, + DATEDIFF(CURRENT_DATE, MAX(login_date)) AS days_since_login +FROM user_logins +GROUP BY user_id +HAVING DATEDIFF(CURRENT_DATE, MAX(login_date)) > 30; +``` + +**3. Peak values:** +```sql +SELECT + DATE_TRUNC('day', timestamp) AS day, + MAX(cpu_usage) AS peak_cpu, + MAX(memory_usage) AS peak_memory, + MAX(active_connections) AS peak_connections +FROM system_metrics +WHERE timestamp >= DATE_SUB(CURRENT_DATE, INTERVAL 7 DAY) +GROUP BY DATE_TRUNC('day', timestamp) +ORDER BY day; +``` + +--- + +## Function: FIRST_VALUE + +**Description:** +Window function: returns the first value in an ordered partition. Pushed as `top_hits size=1` to Elasticsearch when possible. + +**Syntax:** +```sql +FIRST_VALUE(expr) OVER ( + [PARTITION BY partition_expr, ...] + [ORDER BY order_expr [ASC|DESC], ...] +) +``` + +**Inputs:** +- `expr` - Expression to return +- `PARTITION BY` - Optional grouping columns +- `ORDER BY` - Ordering specification (if not provided, only expr column name is used for sorting) + +**Output:** +- Same type as input expression + +**Behavior:** +- Returns the first value based on `ORDER BY` within each partition +- If `OVER` is not provided, only the expr column name is used for sorting +- Optimized to Elasticsearch `top_hits` aggregation with `size=1` + +**Examples:** + +**Basic FIRST_VALUE:** +```sql +-- First salary in each department (ordered by hire date) +SELECT + department, + name, + salary, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_salary +FROM emp; +``` + +**Without PARTITION BY:** +```sql +-- First hire across entire company +SELECT + name, + hire_date, + FIRST_VALUE(name) OVER (ORDER BY hire_date ASC) AS first_hired_employee +FROM emp; +``` + +**Without OVER clause:** +```sql +-- Uses expr column name for sorting +SELECT + department, + FIRST_VALUE(salary) AS first_salary_value +FROM emp; +``` + +**Multiple Partitions:** +```sql +-- First employee hired in each department and job title +SELECT + department, + job_title, + name, + hire_date, + FIRST_VALUE(name) OVER ( + PARTITION BY department, job_title + ORDER BY hire_date ASC + ) AS first_in_role +FROM emp; +``` + +**Practical Examples:** + +**1. First purchase per customer:** +```sql +SELECT + customer_id, + order_id, + order_date, + total_amount, + FIRST_VALUE(total_amount) OVER ( + PARTITION BY customer_id + ORDER BY order_date ASC + ) AS first_order_amount +FROM orders; +``` + +**2. Initial stock price:** +```sql +SELECT + stock_symbol, + trade_date, + closing_price, + FIRST_VALUE(closing_price) OVER ( + PARTITION BY stock_symbol + ORDER BY trade_date ASC + ) AS initial_price +FROM stock_prices; +``` + +**3. Baseline metrics:** +```sql +SELECT + server_name, + timestamp, + cpu_usage, + FIRST_VALUE(cpu_usage) OVER ( + PARTITION BY server_name + ORDER BY timestamp ASC + ) AS baseline_cpu +FROM server_metrics +WHERE DATE(timestamp) = CURRENT_DATE; +``` + +**Comparison with MIN:** +```sql +-- FIRST_VALUE (order-dependent) +SELECT + department, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_hired_salary +FROM emp; + +-- MIN (value-dependent) +SELECT + department, + MIN(salary) AS lowest_salary +FROM emp +GROUP BY department; +``` + +--- + +## Function: LAST_VALUE + +**Description:** +Window function: returns the last value in an ordered partition. Pushed to Elasticsearch by flipping sort order in `top_hits`. + +**Syntax:** +```sql +LAST_VALUE(expr) OVER ( + [PARTITION BY partition_expr, ...] + [ORDER BY order_expr [ASC|DESC], ...] +) +``` + +**Inputs:** +- `expr` - Expression to return +- `PARTITION BY` - Optional grouping columns +- `ORDER BY` - Ordering specification (if not provided, only expr column name is used for sorting) + +**Output:** +- Same type as input expression + +**Behavior:** +- Returns the last value based on `ORDER BY` within each partition +- If `OVER` is not provided, only the expr column name is used for sorting +- Optimized to Elasticsearch `top_hits` by reversing sort order + +**Examples:** + +**Basic LAST_VALUE:** +```sql +-- Last salary in each department (ordered by hire date) +SELECT + department, + name, + salary, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_salary +FROM emp; +``` + +**Without PARTITION BY:** +```sql +-- Most recent hire across entire company +SELECT + name, + hire_date, + LAST_VALUE(name) OVER (ORDER BY hire_date ASC) AS last_hired_employee +FROM emp; +``` + +**Without OVER clause:** +```sql +-- Uses expr column name for sorting +SELECT + department, + LAST_VALUE(salary) AS last_salary_value +FROM emp; +``` + +**Multiple Partitions:** +```sql +-- Last employee hired in each department and job title +SELECT + department, + job_title, + name, + hire_date, + LAST_VALUE(name) OVER ( + PARTITION BY department, job_title + ORDER BY hire_date ASC + ) AS last_in_role +FROM emp; +``` + +**Practical Examples:** + +**1. Most recent purchase per customer:** +```sql +SELECT + customer_id, + order_id, + order_date, + total_amount, + LAST_VALUE(total_amount) OVER ( + PARTITION BY customer_id + ORDER BY order_date ASC + ) AS last_order_amount +FROM orders; +``` + +**2. Latest stock price:** +```sql +SELECT + stock_symbol, + trade_date, + closing_price, + LAST_VALUE(closing_price) OVER ( + PARTITION BY stock_symbol + ORDER BY trade_date ASC + ) AS current_price +FROM stock_prices; +``` + +**3. Current status:** +```sql +SELECT + user_id, + status_change_date, + status, + LAST_VALUE(status) OVER ( + PARTITION BY user_id + ORDER BY status_change_date ASC + ) AS current_status +FROM user_status_history; +``` + +**Comparison with MAX:** +```sql +-- LAST_VALUE (order-dependent) +SELECT + department, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_hired_salary +FROM emp; + +-- MAX (value-dependent) +SELECT + department, + MAX(salary) AS highest_salary +FROM emp +GROUP BY department; +``` + +**FIRST_VALUE vs LAST_VALUE:** +```sql +-- Compare first and last values +SELECT + department, + FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS first_hire_salary, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS last_hire_salary, + LAST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) - FIRST_VALUE(salary) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS salary_change +FROM emp; +``` + +--- + +## Function: ARRAY_AGG + +**Description:** +Collect values into an array for each partition. Implemented using `OVER` and pushed to Elasticsearch as `top_hits`. Post-processing converts hits to an array of scalars. + +**Syntax:** +```sql +ARRAY_AGG(expr) OVER ( + [PARTITION BY partition_expr, ...] + [ORDER BY order_expr [ASC|DESC], ...] +) +``` + +**Inputs:** +- `expr` - Expression to collect +- `PARTITION BY` - Optional grouping columns +- `ORDER BY` - Optional ordering (if not provided, only expr column name is used for sorting) + +**Output:** +- `ARRAY` - Array of collected values + +**Behavior:** +- Collects all values of `expr` within each partition into an array +- If `OVER` is not provided, only the expr column name is used for sorting +- Optimized to Elasticsearch `top_hits` aggregation +- Post-processing converts hits to array of scalars + +**Examples:** + +**Basic ARRAY_AGG:** +```sql +-- Collect employee names per department +SELECT + department, + ARRAY_AGG(name) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS employees +FROM emp +LIMIT 100; +-- Result: employees as an array of name values per department (sorted and limited) +``` + +**Without PARTITION BY:** +```sql +-- Collect all employee names (ordered) +SELECT + ARRAY_AGG(name) OVER (ORDER BY hire_date ASC) AS all_employees +FROM emp; +``` + +**Without OVER clause:** +```sql +-- Uses expr column name for sorting +SELECT + department, + ARRAY_AGG(name) AS employee_list +FROM emp; +``` + +**With Multiple Columns:** +```sql +-- Collect salaries per department +SELECT + department, + ARRAY_AGG(salary) OVER ( + PARTITION BY department + ORDER BY salary DESC + ) AS salary_list +FROM emp; +``` + +**Ordered Collection:** +```sql +-- Collect products by price (high to low) +SELECT + category, + ARRAY_AGG(product_name) OVER ( + PARTITION BY category + ORDER BY price DESC + ) AS products_by_price +FROM products +LIMIT 100; +``` + +**Practical Examples:** + +**1. Customer order history:** +```sql +SELECT + customer_id, + ARRAY_AGG(order_id) OVER ( + PARTITION BY customer_id + ORDER BY order_date DESC + ) AS order_history, + ARRAY_AGG(total_amount) OVER ( + PARTITION BY customer_id + ORDER BY order_date DESC + ) AS amount_history +FROM orders +LIMIT 1000; +``` + +**2. Product tags:** +```sql +SELECT + product_id, + product_name, + ARRAY_AGG(tag) OVER ( + PARTITION BY product_id + ORDER BY tag ASC + ) AS tags +FROM product_tags +LIMIT 500; +``` + +**3. Timeline of events:** +```sql +SELECT + user_id, + ARRAY_AGG(event_type) OVER ( + PARTITION BY user_id + ORDER BY event_timestamp ASC + ) AS event_timeline, + ARRAY_AGG(event_timestamp) OVER ( + PARTITION BY user_id + ORDER BY event_timestamp ASC + ) AS timestamp_timeline +FROM user_events +WHERE event_timestamp >= DATE_SUB(CURRENT_DATE, INTERVAL 30 DAY) +LIMIT 1000; +``` + +**4. Hierarchical data:** +```sql +SELECT + manager_id, + ARRAY_AGG(employee_name) OVER ( + PARTITION BY manager_id + ORDER BY hire_date ASC + ) AS direct_reports +FROM employees +WHERE manager_id IS NOT NULL +LIMIT 100; +``` + +**LIMIT Consideration:** +```sql +-- Always use LIMIT with ARRAY_AGG to prevent memory issues +SELECT + department, + ARRAY_AGG(name) OVER ( + PARTITION BY department + ORDER BY hire_date ASC + ) AS employees +FROM emp +LIMIT 100; -- Important: limits result set size +``` + +**Comparison with STRING_AGG (if available):** +```sql +-- ARRAY_AGG returns array +SELECT + department, + ARRAY_AGG(name) OVER (PARTITION BY department) AS name_array +FROM emp; +-- Result: ['John', 'Jane', 'Bob'] + +-- STRING_AGG returns string (if supported) +SELECT + department, + STRING_AGG(name, ', ') AS name_string +FROM emp +GROUP BY department; +-- Result: 'John, Jane, Bob' +``` + +--- + +## Aggregate Functions Summary + +| Function | Purpose | Input | Output | NULL Handling | +|------------------------|-----------------------|------------|---------------|------------------| +| `COUNT(*)` | Count all rows | Any | `BIGINT` | Includes NULLs | +| `COUNT(expr)` | Count non-NULL values | Any | `BIGINT` | Excludes NULLs | +| `COUNT(DISTINCT expr)` | Count distinct values | Any | `BIGINT` | Excludes NULLs | +| `SUM(expr)` | Sum values | Numeric | Numeric | Ignores NULLs | +| `AVG(expr)` | Average values | Numeric | `DOUBLE` | Ignores NULLs | +| `MIN(expr)` | Minimum value | Comparable | Same as input | Ignores NULLs | +| `MAX(expr)` | Maximum value | Comparable | Same as input | Ignores NULLs | +| `FIRST_VALUE(expr)` | First value (ordered) | Any | Same as input | Depends on ORDER | +| `LAST_VALUE(expr)` | Last value (ordered) | Any | Same as input | Depends on ORDER | +| `ARRAY_AGG(expr)` | Collect into array | Any | `ARRAY` | Includes NULLs | + +[Back to index](README.md) diff --git a/documentation/sql/functions_conditional.md b/documentation/sql/functions_conditional.md new file mode 100644 index 00000000..c2fb2ca3 --- /dev/null +++ b/documentation/sql/functions_conditional.md @@ -0,0 +1,397 @@ +[Back to index](README.md) + +# Conditional Functions + +This page documents conditional expressions. + +--- + +### Function: CASE (searched form) +**Name & Aliases:** `CASE WHEN ... THEN ... ELSE ... END` (searched CASE form) + +**Description:** +Evaluates boolean WHEN expressions in order; returns the result expression corresponding to the first true condition; if none match, returns the ELSE expression (or NULL if ELSE omitted). + +**Syntax:** +```sql +CASE + WHEN condition1 THEN result1 + WHEN condition2 THEN result2 + ... + ELSE default_result +END +``` + +**Inputs:** +- One or more `WHEN condition THEN result` pairs. Optional `ELSE result`. + +**Output:** +- Type coerced from result expressions (THEN/ELSE). + +**Examples:** + +**1. Salary banding:** +```sql +SELECT + name, + salary, + CASE + WHEN salary > 100000 THEN 'very_high' + WHEN salary > 50000 THEN 'high' + ELSE 'normal' + END AS salary_band +FROM emp +``` + +**2. Product status:** +```sql +SELECT + title, + stock, + CASE + WHEN stock = 0 THEN 'Out of Stock' + WHEN stock < 10 THEN 'Low Stock' + WHEN stock < 50 THEN 'In Stock' + ELSE 'Well Stocked' + END AS stock_status +FROM products +``` + +**3. Discount calculation:** +```sql +SELECT + title, + price, + CASE + WHEN price > 1000 THEN price * 0.85 -- 15% off + WHEN price > 500 THEN price * 0.90 -- 10% off + WHEN price > 100 THEN price * 0.95 -- 5% off + ELSE price + END AS discounted_price +FROM products +``` + +**4. Without ELSE (returns NULL if no match):** +```sql +SELECT + name, + CASE + WHEN age < 18 THEN 'Minor' + WHEN age < 65 THEN 'Adult' + END AS age_group +FROM persons +-- Returns NULL for age >= 65 +``` + +--- + +### Function: CASE (simple / expression form) +**Name & Aliases:** `CASE expr WHEN val1 THEN r1 WHEN val2 THEN r2 ... ELSE rN END` (simple CASE) + +**Description:** +Compare `expr` to `valN` sequentially using equality; returns corresponding `rN` for first match; else `ELSE` result or NULL. + +**Syntax:** +```sql +CASE expression + WHEN value1 THEN result1 + WHEN value2 THEN result2 + ... + ELSE default_result +END +``` + +**Inputs:** +- `expr` (any comparable type) and pairs `WHEN value THEN result`. + +**Output:** +- Type coerced from result expressions. + +**Implementation notes:** +The simple form evaluates by comparing `expr = value` for each WHEN. +Both CASE forms are parsed and translated into nested conditional Painless scripts for `script_fields` when used outside an aggregation push-down. + +**Examples:** + +**1. Department categorization:** +```sql +SELECT + name, + department, + CASE department + WHEN 'IT' THEN 'tech' + WHEN 'Sales' THEN 'revenue' + WHEN 'Marketing' THEN 'revenue' + WHEN 'Engineering' THEN 'tech' + ELSE 'other' + END AS dept_category +FROM emp +``` + +**2. Status mapping:** +```sql +SELECT + order_id, + CASE status + WHEN 'P' THEN 'Pending' + WHEN 'S' THEN 'Shipped' + WHEN 'D' THEN 'Delivered' + WHEN 'C' THEN 'Cancelled' + ELSE 'Unknown' + END AS status_label +FROM orders +``` + +**3. Priority levels:** +```sql +SELECT + ticket_id, + CASE priority + WHEN 1 THEN 'Critical' + WHEN 2 THEN 'High' + WHEN 3 THEN 'Medium' + WHEN 4 THEN 'Low' + ELSE 'Undefined' + END AS priority_name +FROM tickets +``` + +**4. Numeric to text conversion:** +```sql +SELECT + product_id, + CASE rating + WHEN 5 THEN '★★★★★' + WHEN 4 THEN '★★★★☆' + WHEN 3 THEN '★★★☆☆' + WHEN 2 THEN '★★☆☆☆' + WHEN 1 THEN '★☆☆☆☆' + ELSE 'No rating' + END AS star_display +FROM reviews +``` +--- + +### COALESCE + +Returns the first non-null argument. + +**Syntax:** +```sql +COALESCE(expr1, expr2, ...) +``` + +**Inputs:** +- One or more expressions + +**Output:** +- Value of first non-null expression (coerced to common type) + +**Examples:** + +**1. Display name fallback:** +```sql +SELECT + COALESCE(nickname, firstname, 'N/A') AS display_name +FROM users + +-- If nickname = 'Jo': returns 'Jo' +-- If nickname = NULL, firstname = 'John': returns 'John' +-- If both NULL: returns 'N/A' +``` + +**2. Default values:** +```sql +SELECT + title, + COALESCE(discount_price, price) AS final_price +FROM products +-- Uses discount_price if available, otherwise regular price +``` + +**3. Multiple fallbacks:** +```sql +SELECT + COALESCE(mobile_phone, work_phone, home_phone, 'No phone') AS contact_phone +FROM customers +``` + +**4. Handling missing data:** +```sql +SELECT + name, + COALESCE(email, 'no-email@example.com') AS email, + COALESCE(country, 'Unknown') AS country +FROM users +``` + +--- + +### NULLIF + +Returns NULL if expr1 = expr2; otherwise returns expr1. + +**Syntax:** +```sql +NULLIF(expr1, expr2) +``` + +**Inputs:** +- `expr1` - first expression +- `expr2` - second expression + +**Output:** +- Type of `expr1`, or NULL if equal + +**Examples:** + +**1. Normalize unknown values:** +```sql +SELECT + NULLIF(status, 'unknown') AS status_norm +FROM events + +-- If status = 'unknown': returns NULL +-- If status = 'active': returns 'active' +``` + +**2. Handle sentinel values:** +```sql +SELECT + title, + NULLIF(price, 0) AS valid_price +FROM products +-- Converts 0 prices to NULL +``` + +**3. Avoid division by zero:** +```sql +SELECT + total_sales / NULLIF(total_orders, 0) AS avg_order_value +FROM sales_summary +-- Returns NULL instead of error when total_orders = 0 +``` + +**4. Clean data:** +```sql +SELECT + name, + NULLIF(TRIM(description), '') AS description +FROM products +-- Converts empty strings to NULL after trimming +``` + +--- + +### ISNULL + +Tests if expression is NULL. + +**Syntax:** +```sql +ISNULL(expr) +``` + +**Inputs:** +- `expr` - expression to test + +**Output:** +- `BOOLEAN` - TRUE if NULL, FALSE otherwise + +**Examples:** + +**1. Check for missing manager:** +```sql +SELECT + name, + ISNULL(manager) AS manager_missing +FROM emp + +-- Result: TRUE if manager is NULL, else FALSE +``` + +**2. Filter NULL values:** +```sql +SELECT * +FROM products +WHERE ISNULL(description) +-- Returns products without description +``` + +**3. Count NULLs:** +```sql +SELECT + COUNT(*) as total, + SUM(CASE WHEN ISNULL(email) THEN 1 ELSE 0 END) as missing_emails +FROM users +``` + +**4. Conditional logic:** +```sql +SELECT + name, + CASE + WHEN ISNULL(last_login) THEN 'Never logged in' + ELSE 'Active user' + END AS user_status +FROM users +``` + +--- + +### ISNOTNULL + +Tests if expression is NOT NULL. + +**Syntax:** +```sql +ISNOTNULL(expr) +``` + +**Inputs:** +- `expr` - expression to test + +**Output:** +- `BOOLEAN` - TRUE if NOT NULL, FALSE if NULL + +**Examples:** + +**1. Check for existing manager:** +```sql +SELECT + name, + ISNOTNULL(manager) AS has_manager +FROM emp + +-- Result: TRUE if manager is NOT NULL, else FALSE +``` + +**2. Filter non-NULL values:** +```sql +SELECT * +FROM products +WHERE ISNOTNULL(description) +-- Returns products with description +``` + +**3. Count non-NULLs:** +```sql +SELECT + COUNT(*) as total, + SUM(CASE WHEN ISNOTNULL(email) THEN 1 ELSE 0 END) as with_emails +FROM users +``` + +**4. Required fields validation:** +```sql +SELECT + product_id, + CASE + WHEN ISNOTNULL(title) AND ISNOTNULL(price) AND ISNOTNULL(category) + THEN 'Valid' + ELSE 'Incomplete' + END AS validation_status +FROM products +``` + +[Back to index](README.md) diff --git a/documentation/sql/functions_date_time.md b/documentation/sql/functions_date_time.md new file mode 100644 index 00000000..2fbfe3fe --- /dev/null +++ b/documentation/sql/functions_date_time.md @@ -0,0 +1,930 @@ +[Back to index](README.md) + +# Date / Time / Datetime / Timestamp / Interval Functions + +**Navigation:** [Aggregate functions](functions_aggregate.md) · [Operator Precedence](operator_precedence.md) + +## Date/Time Functions + +This page documents TEMPORAL functions. + +--- + +### Current Date/Time Functions + +#### CURRENT_TIMESTAMP / NOW / CURRENT_DATETIME + +Returns current datetime (ZonedDateTime) in UTC. + +**Syntax:** +```sql +CURRENT_TIMESTAMP +NOW() +CURRENT_DATETIME +``` + +**Inputs:** +- None + +**Output:** +- `TIMESTAMP` / `DATETIME` + +**Examples:** +```sql +SELECT CURRENT_TIMESTAMP AS now; +-- Result: 2025-09-26T12:34:56Z + +SELECT NOW() AS current_time; +-- Result: 2025-09-26T12:34:56Z + +SELECT CURRENT_DATETIME AS dt; +-- Result: 2025-09-26T12:34:56Z +``` + +--- + +#### CURRENT_DATE / CURDATE / TODAY + +Returns current date as `DATE`. + +**Syntax:** +```sql +CURRENT_DATE +CURDATE() +TODAY() +``` + +**Inputs:** +- None + +**Output:** +- `DATE` + +**Examples:** +```sql +SELECT CURRENT_DATE AS today; +-- Result: 2025-09-26 + +SELECT CURDATE() AS today; +-- Result: 2025-09-26 + +SELECT TODAY() AS today; +-- Result: 2025-09-26 +``` + +--- + +#### CURRENT_TIME / CURTIME + +Returns current time-of-day. + +**Syntax:** +```sql +CURRENT_TIME +CURTIME() +``` + +**Inputs:** +- None + +**Output:** +- `TIME` + +**Examples:** +```sql +SELECT CURRENT_TIME AS t; +-- Result: 12:34:56 + +SELECT CURTIME() AS current_time; +-- Result: 12:34:56 +``` + +--- + +### Date/Time Arithmetic Functions + +#### INTERVAL + +Literal syntax for time intervals. + +**Syntax:** +```sql +INTERVAL n UNIT +``` + +**Inputs:** +- `n` - `INT` value +- `UNIT` - One of: `YEAR`, `QUARTER`, `MONTH`, `WEEK`, `DAY`, `HOUR`, `MINUTE`, `SECOND`, `MILLISECOND`, `MICROSECOND`, `NANOSECOND` + +**Output:** +- `INTERVAL` + +**Note:** `INTERVAL` is not a standalone type; it can only be used as part of date/datetime arithmetic functions. + +**Examples:** +```sql +-- Used with DATE_ADD +SELECT DATE_ADD('2025-01-10'::DATE, INTERVAL 1 MONTH); +-- Result: 2025-02-10 + +-- Used with DATETIME_SUB +SELECT DATETIME_SUB(NOW(), INTERVAL 7 DAY); +-- Result: 7 days ago + +-- Various intervals +INTERVAL 1 YEAR +INTERVAL 3 MONTH +INTERVAL 7 DAY +INTERVAL 2 HOUR +INTERVAL 30 MINUTE +INTERVAL 45 SECOND +``` + +--- + +#### DATE_ADD / DATEADD + +Adds interval to `DATE`. + +**Syntax:** +```sql +DATE_ADD(date_expr, INTERVAL n UNIT) +DATEADD(date_expr, INTERVAL n UNIT) +``` + +**Inputs:** +- `date_expr` - `DATE` +- `INTERVAL n UNIT` - where `UNIT` is one of: `YEAR`, `QUARTER`, `MONTH`, `WEEK`, `DAY` + +**Output:** +- `DATE` + +**Examples:** +```sql +-- Add 1 month +SELECT DATE_ADD('2025-01-10'::DATE, INTERVAL 1 MONTH) AS next_month; +-- Result: 2025-02-10 + +-- Add 7 days +SELECT DATE_ADD('2025-01-10'::DATE, INTERVAL 7 DAY) AS next_week; +-- Result: 2025-01-17 + +-- Add 1 year +SELECT DATEADD('2025-01-10'::DATE, INTERVAL 1 YEAR) AS next_year; +-- Result: 2026-01-10 + +-- Add 2 weeks +SELECT DATE_ADD('2025-01-10'::DATE, INTERVAL 2 WEEK) AS two_weeks_later; +-- Result: 2025-01-24 + +-- Add 1 quarter +SELECT DATE_ADD('2025-01-10'::DATE, INTERVAL 1 QUARTER) AS next_quarter; +-- Result: 2025-04-10 +``` + +--- + +#### DATE_SUB / DATESUB + +Subtract interval from `DATE`. + +**Syntax:** +```sql +DATE_SUB(date_expr, INTERVAL n UNIT) +DATESUB(date_expr, INTERVAL n UNIT) +``` + +**Inputs:** +- `date_expr` - `DATE` +- `INTERVAL n UNIT` - where `UNIT` is one of: `YEAR`, `QUARTER`, `MONTH`, `WEEK`, `DAY` + +**Output:** +- `DATE` + +**Examples:** +```sql +-- Subtract 7 days +SELECT DATE_SUB('2025-01-10'::DATE, INTERVAL 7 DAY) AS week_before; +-- Result: 2025-01-03 + +-- Subtract 1 month +SELECT DATE_SUB('2025-01-10'::DATE, INTERVAL 1 MONTH) AS last_month; +-- Result: 2024-12-10 + +-- Subtract 1 year +SELECT DATESUB('2025-01-10'::DATE, INTERVAL 1 YEAR) AS last_year; +-- Result: 2024-01-10 + +-- Subtract 2 weeks +SELECT DATE_SUB('2025-01-10'::DATE, INTERVAL 2 WEEK) AS two_weeks_ago; +-- Result: 2024-12-27 + +-- Subtract 1 quarter +SELECT DATE_SUB('2025-01-10'::DATE, INTERVAL 1 QUARTER) AS last_quarter; +-- Result: 2024-10-10 +``` + +--- + +#### DATETIME_ADD / DATETIMEADD + +Adds interval to `DATETIME` / `TIMESTAMP`. + +**Syntax:** +```sql +DATETIME_ADD(datetime_expr, INTERVAL n UNIT) +DATETIMEADD(datetime_expr, INTERVAL n UNIT) +``` + +**Inputs:** +- `datetime_expr` - `DATETIME` or `TIMESTAMP` +- `INTERVAL n UNIT` - where `UNIT` is one of: `YEAR`, `QUARTER`, `MONTH`, `WEEK`, `DAY`, `HOUR`, `MINUTE`, `SECOND` + +**Output:** +- `DATETIME` + +**Examples:** +```sql +-- Add 1 day +SELECT DATETIME_ADD('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 1 DAY) AS tomorrow; +-- Result: 2025-01-11T12:00:00Z + +-- Add 2 hours +SELECT DATETIME_ADD('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 2 HOUR) AS later; +-- Result: 2025-01-10T14:00:00Z + +-- Add 30 minutes +SELECT DATETIMEADD('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 30 MINUTE) AS half_hour_later; +-- Result: 2025-01-10T12:30:00Z + +-- Add 45 seconds +SELECT DATETIME_ADD('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 45 SECOND) AS seconds_later; +-- Result: 2025-01-10T12:00:45Z + +-- Add 1 month +SELECT DATETIME_ADD('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 1 MONTH) AS next_month; +-- Result: 2025-02-10T12:00:00Z + +-- Add 1 year +SELECT DATETIME_ADD('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 1 YEAR) AS next_year; +-- Result: 2026-01-10T12:00:00Z +``` + +--- + +#### DATETIME_SUB / DATETIMESUB + +Subtract interval from `DATETIME` / `TIMESTAMP`. + +**Syntax:** +```sql +DATETIME_SUB(datetime_expr, INTERVAL n UNIT) +DATETIMESUB(datetime_expr, INTERVAL n UNIT) +``` + +**Inputs:** +- `datetime_expr` - `DATETIME` or `TIMESTAMP` +- `INTERVAL n UNIT` - where `UNIT` is one of: `YEAR`, `QUARTER`, `MONTH`, `WEEK`, `DAY`, `HOUR`, `MINUTE`, `SECOND` + +**Output:** +- `DATETIME` + +**Examples:** +```sql +-- Subtract 2 hours +SELECT DATETIME_SUB('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 2 HOUR) AS earlier; +-- Result: 2025-01-10T10:00:00Z + +-- Subtract 1 day +SELECT DATETIME_SUB('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 1 DAY) AS yesterday; +-- Result: 2025-01-09T12:00:00Z + +-- Subtract 30 minutes +SELECT DATETIMESUB('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 30 MINUTE) AS half_hour_ago; +-- Result: 2025-01-10T11:30:00Z + +-- Subtract 7 days +SELECT DATETIME_SUB('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 7 DAY) AS week_ago; +-- Result: 2025-01-03T12:00:00Z + +-- Subtract 1 month +SELECT DATETIME_SUB('2025-01-10T12:00:00Z'::TIMESTAMP, INTERVAL 1 MONTH) AS last_month; +-- Result: 2024-12-10T12:00:00Z +``` + +--- + +### Date/Time Difference Functions + +#### DATEDIFF / DATE_DIFF + +Difference between 2 dates (date1 - date2) in the specified time unit. + +**Syntax:** +```sql +DATEDIFF(date1, date2) +DATEDIFF(date1, date2, unit) +DATE_DIFF(date1, date2) +DATE_DIFF(date1, date2, unit) +``` + +**Inputs:** +- `date1` - `DATE` or `DATETIME` +- `date2` - `DATE` or `DATETIME` +- `unit` (optional) - One of: `YEAR`, `QUARTER`, `MONTH`, `WEEK`, `DAY`, `HOUR`, `MINUTE`, `SECOND` + - Default: `DAY` + +**Output:** +- `BIGINT` + +**Examples:** +```sql +-- Difference in days (default) +SELECT DATEDIFF('2025-01-10'::DATE, '2025-01-01'::DATE) AS diff; +-- Result: 9 + +-- Difference in days (explicit) +SELECT DATEDIFF('2025-01-10'::DATE, '2025-01-01'::DATE, DAY) AS diff_days; +-- Result: 9 + +-- Difference in weeks +SELECT DATE_DIFF('2025-01-31'::DATE, '2025-01-01'::DATE, WEEK) AS diff_weeks; +-- Result: 4 + +-- Difference in months +SELECT DATEDIFF('2025-06-01'::DATE, '2025-01-01'::DATE, MONTH) AS diff_months; +-- Result: 5 + +-- Difference in years +SELECT DATEDIFF('2027-01-01'::DATE, '2025-01-01'::DATE, YEAR) AS diff_years; +-- Result: 2 + +-- Difference in hours (with timestamps) +SELECT DATEDIFF('2025-01-10T14:00:00Z'::TIMESTAMP, '2025-01-10T12:00:00Z'::TIMESTAMP, HOUR) AS diff_hours; +-- Result: 2 + +-- Difference in minutes +SELECT DATEDIFF('2025-01-10T12:30:00Z'::TIMESTAMP, '2025-01-10T12:00:00Z'::TIMESTAMP, MINUTE) AS diff_minutes; +-- Result: 30 + +-- Difference in seconds +SELECT DATEDIFF('2025-01-10T12:00:45Z'::TIMESTAMP, '2025-01-10T12:00:00Z'::TIMESTAMP, SECOND) AS diff_seconds; +-- Result: 45 +``` + +--- + +### Date/Time Formatting Functions + +#### DATE_FORMAT + +Format `DATE` to `VARCHAR`. + +**Syntax:** +```sql +DATE_FORMAT(date_expr, pattern) +``` + +**Inputs:** +- `date_expr` - `DATE` +- `pattern` - `VARCHAR` (MySQL-style pattern) + +**Output:** +- `VARCHAR` + +**Examples:** +```sql +-- Simple date formatting +SELECT DATE_FORMAT('2025-01-10'::DATE, '%Y-%m-%d') AS fmt; +-- Result: '2025-01-10' + +-- Day of the week (full name) +SELECT DATE_FORMAT('2025-01-10'::DATE, '%W') AS weekday; +-- Result: 'Friday' + +-- Month name (full) +SELECT DATE_FORMAT('2025-01-10'::DATE, '%M') AS month_name; +-- Result: 'January' + +-- Custom format +SELECT DATE_FORMAT('2025-01-10'::DATE, '%W, %M %d, %Y') AS formatted; +-- Result: 'Friday, January 10, 2025' + +-- Short format +SELECT DATE_FORMAT('2025-01-10'::DATE, '%m/%d/%y') AS short_date; +-- Result: '01/10/25' + +-- Day of month without leading zero +SELECT DATE_FORMAT('2025-01-09'::DATE, '%e') AS day; +-- Result: '9' + +-- Abbreviated weekday and month +SELECT DATE_FORMAT('2025-01-10'::DATE, '%a, %b %d') AS abbrev; +-- Result: 'Fri, Jan 10' +``` + +--- + +#### DATE_PARSE + +Parse `VARCHAR` into `DATE`. + +**Syntax:** +```sql +DATE_PARSE(string, pattern) +``` + +**Inputs:** +- `string` - `VARCHAR` +- `pattern` - `VARCHAR` (MySQL-style pattern) + +**Output:** +- `DATE` + +**Examples:** +```sql +-- Parse ISO-style date +SELECT DATE_PARSE('2025-01-10', '%Y-%m-%d') AS d; +-- Result: 2025-01-10 + +-- Parse with day of week +SELECT DATE_PARSE('Friday 2025-01-10', '%W %Y-%m-%d') AS d; +-- Result: 2025-01-10 + +-- Parse US format +SELECT DATE_PARSE('01/10/2025', '%m/%d/%Y') AS d; +-- Result: 2025-01-10 + +-- Parse with month name +SELECT DATE_PARSE('January 10, 2025', '%M %d, %Y') AS d; +-- Result: 2025-01-10 + +-- Parse with abbreviated month +SELECT DATE_PARSE('Jan 10, 2025', '%b %d, %Y') AS d; +-- Result: 2025-01-10 + +-- Parse 2-digit year +SELECT DATE_PARSE('10/01/25', '%d/%m/%y') AS d; +-- Result: 2025-01-10 +``` + +--- + +#### DATETIME_FORMAT + +Format `DATETIME` / `TIMESTAMP` to `VARCHAR` with pattern. + +**Syntax:** +```sql +DATETIME_FORMAT(datetime_expr, pattern) +``` + +**Inputs:** +- `datetime_expr` - `DATETIME` or `TIMESTAMP` +- `pattern` - `VARCHAR` (MySQL-style pattern) + +**Output:** +- `VARCHAR` + +**Examples:** +```sql +-- Format with seconds and microseconds +SELECT DATETIME_FORMAT('2025-01-10T12:00:00.123456Z'::TIMESTAMP, '%Y-%m-%d %H:%i:%s.%f') AS s; +-- Result: '2025-01-10 12:00:00.123456' + +-- Format 12-hour clock with AM/PM +SELECT DATETIME_FORMAT('2025-01-10T13:45:30Z'::TIMESTAMP, '%Y-%m-%d %h:%i:%s %p') AS s; +-- Result: '2025-01-10 01:45:30 PM' + +-- Format with full weekday name +SELECT DATETIME_FORMAT('2025-01-10T13:45:30Z'::TIMESTAMP, '%W, %Y-%m-%d') AS s; +-- Result: 'Friday, 2025-01-10' + +-- Full datetime with day name and month name +SELECT DATETIME_FORMAT('2025-01-10T13:45:30Z'::TIMESTAMP, '%W, %M %d, %Y at %h:%i %p') AS s; +-- Result: 'Friday, January 10, 2025 at 01:45 PM' + +-- ISO 8601 format +SELECT DATETIME_FORMAT('2025-01-10T13:45:30Z'::TIMESTAMP, '%Y-%m-%dT%H:%i:%s') AS s; +-- Result: '2025-01-10T13:45:30' + +-- 24-hour format +SELECT DATETIME_FORMAT('2025-01-10T13:45:30Z'::TIMESTAMP, '%H:%i:%s') AS s; +-- Result: '13:45:30' + +-- 12-hour format +SELECT DATETIME_FORMAT('2025-01-10T13:45:30Z'::TIMESTAMP, '%h:%i:%s %p') AS s; +-- Result: '01:45:30 PM' +``` + +--- + +#### DATETIME_PARSE + +Parse `VARCHAR` into `DATETIME` / `TIMESTAMP`. + +**Syntax:** +```sql +DATETIME_PARSE(string, pattern) +``` + +**Inputs:** +- `string` - `VARCHAR` +- `pattern` - `VARCHAR` (MySQL-style pattern) + +**Output:** +- `DATETIME` + +**Examples:** +```sql +-- Parse full datetime with microseconds +SELECT DATETIME_PARSE('2025-01-10 12:00:00.123456', '%Y-%m-%d %H:%i:%s.%f') AS dt; +-- Result: 2025-01-10T12:00:00.123456Z + +-- Parse 12-hour clock with AM/PM +SELECT DATETIME_PARSE('2025-01-10 01:45:30 PM', '%Y-%m-%d %h:%i:%s %p') AS dt; +-- Result: 2025-01-10T13:45:30Z + +-- Parse ISO 8601 format +SELECT DATETIME_PARSE('2025-01-10T13:45:30', '%Y-%m-%dT%H:%i:%s') AS dt; +-- Result: 2025-01-10T13:45:30Z + +-- Parse with full day and month names +SELECT DATETIME_PARSE('Friday, January 10, 2025 at 01:45 PM', '%W, %M %d, %Y at %h:%i %p') AS dt; +-- Result: 2025-01-10T13:45:00Z + +-- Parse 24-hour format +SELECT DATETIME_PARSE('2025-01-10 13:45:30', '%Y-%m-%d %H:%i:%s') AS dt; +-- Result: 2025-01-10T13:45:30Z + +-- Parse abbreviated names +SELECT DATETIME_PARSE('Fri, Jan 10, 2025 1:45 PM', '%a, %b %d, %Y %h:%i %p') AS dt; +-- Result: 2025-01-10T13:45:00Z +``` + +--- + +### Date/Time Truncation Function + +#### DATE_TRUNC + +Truncate date/datetime to a `unit`. + +**Syntax:** +```sql +DATE_TRUNC(date_or_datetime_expr, unit) +``` + +**Inputs:** +- `date_or_datetime_expr` - `DATE` or `DATETIME` +- `unit` - One of: `YEAR`, `QUARTER`, `MONTH`, `WEEK`, `DAY`, `HOUR`, `MINUTE`, `SECOND` + +**Output:** +- `DATE` or `DATETIME` (same type as input) + +**Examples:** +```sql +-- Truncate to start of month +SELECT DATE_TRUNC('2025-01-15'::DATE, MONTH) AS start_month; +-- Result: 2025-01-01 + +-- Truncate to start of year +SELECT DATE_TRUNC('2025-06-15'::DATE, YEAR) AS start_year; +-- Result: 2025-01-01 + +-- Truncate to start of quarter +SELECT DATE_TRUNC('2025-05-15'::DATE, QUARTER) AS start_quarter; +-- Result: 2025-04-01 + +-- Truncate to start of week +SELECT DATE_TRUNC('2025-01-15'::DATE, WEEK) AS start_week; +-- Result: 2025-01-13 (Monday) + +-- Truncate datetime to hour +SELECT DATE_TRUNC('2025-01-10T12:34:56Z'::TIMESTAMP, HOUR) AS start_hour; +-- Result: 2025-01-10T12:00:00Z + +-- Truncate datetime to minute +SELECT DATE_TRUNC('2025-01-10T12:34:56Z'::TIMESTAMP, MINUTE) AS start_minute; +-- Result: 2025-01-10T12:34:00Z + +-- Truncate datetime to day +SELECT DATE_TRUNC('2025-01-10T12:34:56Z'::TIMESTAMP, DAY) AS start_day; +-- Result: 2025-01-10T00:00:00Z +``` + +--- + +### Date/Time Extraction Functions + +#### EXTRACT + +Extract field from date or datetime. + +**Syntax:** +```sql +EXTRACT(unit FROM date_expr) +``` + +**Inputs:** +- `unit` - One of: `YEAR`, `QUARTER`, `MONTH`, `WEEK`, `DAY`, `HOUR`, `MINUTE`, `SECOND` +- `date_expr` - `DATE` or `DATETIME` + +**Output:** +- `INT` / `BIGINT` + +**Examples:** +```sql +-- Extract year +SELECT EXTRACT(YEAR FROM '2025-01-10T12:00:00Z'::TIMESTAMP) AS y; +-- Result: 2025 + +-- Extract month +SELECT EXTRACT(MONTH FROM '2025-01-10T12:00:00Z'::TIMESTAMP) AS m; +-- Result: 1 + +-- Extract day +SELECT EXTRACT(DAY FROM '2025-01-10T12:00:00Z'::TIMESTAMP) AS d; +-- Result: 10 + +-- Extract hour +SELECT EXTRACT(HOUR FROM '2025-01-10T12:34:56Z'::TIMESTAMP) AS h; +-- Result: 12 + +-- Extract minute +SELECT EXTRACT(MINUTE FROM '2025-01-10T12:34:56Z'::TIMESTAMP) AS min; +-- Result: 34 + +-- Extract second +SELECT EXTRACT(SECOND FROM '2025-01-10T12:34:56Z'::TIMESTAMP) AS sec; +-- Result: 56 + +-- Extract quarter +SELECT EXTRACT(QUARTER FROM '2025-05-10'::DATE) AS q; +-- Result: 2 + +-- Extract week +SELECT EXTRACT(WEEK FROM '2025-01-15'::DATE) AS w; +-- Result: 3 +``` + +--- + +#### Individual Extraction Functions + +**YEAR / QUARTER / MONTH / WEEK / DAY** + +**Syntax:** +```sql +YEAR(date_expr) +QUARTER(date_expr) +MONTH(date_expr) +WEEK(date_expr) +DAY(date_expr) +``` + +**Examples:** +```sql +-- Extract year +SELECT YEAR('2025-01-10'::DATE) AS year; +-- Result: 2025 + +-- Extract quarter (1-4) +SELECT QUARTER('2025-05-10'::DATE) AS q; +-- Result: 2 + +-- Extract month (1-12) +SELECT MONTH('2025-01-10'::DATE) AS month; +-- Result: 1 + +-- Extract ISO week number (1-53) +SELECT WEEK('2025-01-01'::DATE) AS w; +-- Result: 1 + +-- Extract day of month (1-31) +SELECT DAY('2025-01-10'::DATE) AS day; +-- Result: 10 +``` + +**HOUR / MINUTE / SECOND** + +**Syntax:** +```sql +HOUR(timestamp) +MINUTE(timestamp) +SECOND(timestamp) +``` + +**Examples:** +```sql +-- Extract hour (0-23) +SELECT HOUR('2025-01-10T12:34:56Z'::TIMESTAMP) AS hour; +-- Result: 12 + +-- Extract minute (0-59) +SELECT MINUTE('2025-01-10T12:34:56Z'::TIMESTAMP) AS minute; +-- Result: 34 + +-- Extract second (0-59) +SELECT SECOND('2025-01-10T12:34:56Z'::TIMESTAMP) AS second; +-- Result: 56 +``` + +--- + +#### Sub-Second Extraction Functions + +**NANOSECOND / MICROSECOND / MILLISECOND** + +Sub-second extraction from timestamps. + +**Syntax:** +```sql +NANOSECOND(datetime_expr) +MICROSECOND(datetime_expr) +MILLISECOND(datetime_expr) +``` + +**Inputs:** +- `datetime_expr` - `DATETIME` or `TIMESTAMP` + +**Output:** +- `INT` + +**Examples:** +```sql +-- Extract milliseconds +SELECT MILLISECOND('2025-01-01T12:00:00.123Z'::TIMESTAMP) AS ms; +-- Result: 123 + +-- Extract microseconds +SELECT MICROSECOND('2025-01-01T12:00:00.123456Z'::TIMESTAMP) AS us; +-- Result: 123456 + +-- Extract nanoseconds +SELECT NANOSECOND('2025-01-01T12:00:00.123456789Z'::TIMESTAMP) AS ns; +-- Result: 123456789 +``` + +--- + +### Special Date/Time Functions + +#### LAST_DAY + +Last day of month for a date. + +**Syntax:** +```sql +LAST_DAY(date_expr) +``` + +**Inputs:** +- `date_expr` - `DATE` + +**Output:** +- `DATE` + +**Examples:** +```sql +-- Last day of February (non-leap year) +SELECT LAST_DAY('2025-02-15'::DATE) AS ld; +-- Result: 2025-02-28 + +-- Last day of February (leap year) +SELECT LAST_DAY('2024-02-15'::DATE) AS ld; +-- Result: 2024-02-29 + +-- Last day of January +SELECT LAST_DAY('2025-01-10'::DATE) AS ld; +-- Result: 2025-01-31 + +-- Last day of current month +SELECT LAST_DAY(CURRENT_DATE) AS month_end; +``` + +--- + +#### EPOCHDAY + +Days since epoch (1970-01-01). + +**Syntax:** +```sql +EPOCHDAY(date_expr) +``` + +**Inputs:** +- `date_expr` - `DATE` + +**Output:** +- `BIGINT` + +**Examples:** +```sql +-- Day after epoch +SELECT EPOCHDAY('1970-01-02'::DATE) AS d; +-- Result: 1 + +-- Epoch day +SELECT EPOCHDAY('1970-01-01'::DATE) AS d; +-- Result: 0 + +-- Days since epoch for a recent date +SELECT EPOCHDAY('2025-01-10'::DATE) AS d; +-- Result: 20098 +``` + +--- + +#### OFFSET_SECONDS + +Timezone offset in seconds. + +**Syntax:** +```sql +OFFSET_SECONDS(timestamp_expr) +``` + +**Inputs:** +- `timestamp_expr` - `TIMESTAMP` with timezone + +**Output:** +- `INT` + +**Examples:** +```sql +-- UTC+2 (7200 seconds = 2 hours) +SELECT OFFSET_SECONDS('2025-01-01T12:00:00+02:00'::TIMESTAMP) AS off; +-- Result: 7200 + +-- UTC (0 seconds) +SELECT OFFSET_SECONDS('2025-01-01T12:00:00Z'::TIMESTAMP) AS off; +-- Result: 0 + +-- UTC-5 (-18000 seconds = -5 hours) +SELECT OFFSET_SECONDS('2025-01-01T12:00:00-05:00'::TIMESTAMP) AS off; +-- Result: -18000 +``` + +--- + +### Supported MySQL-style Date/Time Patterns + +The following patterns are supported in `DATE_FORMAT`, `DATE_PARSE`, `DATETIME_FORMAT`, and `DATETIME_PARSE` functions: + +| Patternc | Descriptionc | Example Outputc | +|----------|--------------------------------------|-----------------| +| `%Y` | Year (4 digits) | `2025` | +| `%y` | Year (2 digits) | `25` | +| `%m` | Month (2 digits, 01-12) | `01` | +| `%c` | Month (1-12, no leading zero) | `1` | +| `%M` | Month name (full) | `January` | +| `%b` | Month name (abbreviated) | `Jan` | +| `%d` | Day of month (2 digits, 01-31) | `10` | +| `%e` | Day of month (1-31, no leading zero) | `9` | +| `%W` | Weekday name (full) | `Friday` | +| `%a` | Weekday name (abbreviated) | `Fri` | +| `%H` | Hour (00-23, 24-hour format) | `13` | +| `%h` | Hour (01-12, 12-hour format) | `01` | +| `%I` | Hour (01-12, synonym for %h) | `01` | +| `%i` | Minutes (00-59) | `45` | +| `%s` | Seconds (00-59) | `30` | +| `%f` | Microseconds (000000-999999) | `123456` | +| `%p` | AM/PM marker | `AM` / `PM` | + +**Pattern Combination Examples:** + +```sql +-- Full date and time +'%Y-%m-%d %H:%i:%s' -- 2025-01-10 13:45:30 + +-- US format with 12-hour time +'%m/%d/%Y %h:%i %p' -- 01/10/2025 01:45 PM + +-- Long format with names +'%W, %M %d, %Y' -- Friday, January 10, 2025 + +-- ISO 8601 with microseconds +'%Y-%m-%dT%H:%i:%s.%f' -- 2025-01-10T13:45:30.123456 + +-- Short format +'%d-%b-%y' -- 10-Jan-25 + +-- Time only (24-hour) +'%H:%i:%s' -- 13:45:30 + +-- Time only (12-hour) +'%h:%i:%s %p' -- 01:45:30 PM + +-- European format +'%d/%m/%Y' -- 10/01/2025 + +-- Year and month +'%Y-%m' -- 2025-01 + +-- Month and day with names +'%b %d' -- Jan 10 +``` + +[Back to index](README.md) diff --git a/documentation/sql/functions_geo.md b/documentation/sql/functions_geo.md new file mode 100644 index 00000000..1a6ef262 --- /dev/null +++ b/documentation/sql/functions_geo.md @@ -0,0 +1,652 @@ +[Back to index](README.md) + +## Geo Functions + +--- + +### ST_DISTANCE / DISTANCE + +Computes the geodesic distance (great-circle distance) between two points. + +**Syntax:** +```sql +ST_DISTANCE(point1, point2) +DISTANCE(point1, point2) -- Alias +``` + +**Inputs:** + +Each point can be: +- A column of type `geo_point` in Elasticsearch +- A literal defined with `POINT(latitude, longitude)` + +**Output:** +- Distance value (numeric) that can be compared with distance literals + +**Distance Literals:** + +When comparing distances, specify the unit directly after the numeric value: + +```sql +value km -- Kilometers +value m -- Meters (default if no unit) +value cm -- Centimeters +value mm -- Millimeters +value mi -- Miles +value yd -- Yards +value ft -- Feet +value in -- Inches +value nmi -- Nautical miles +``` + +**Performance Note:** + +If both arguments are fixed points, the distance is **precomputed at query compilation time**. + +--- + +### Supported Distance Units + +| Categoryc | Unitc | Syntaxc | Examplec | +|--------------|----------------|---------|----------------------| +| **Metric** | Kilometers | `km` | `5000 km`, `10.5 km` | +| | Meters | `m` | `500 m`, `1000 m` | +| | Centimeters | `cm` | `100 cm`, `50 cm` | +| | Millimeters | `mm` | `1000 mm`, `500 mm` | +| **Imperial** | Miles | `mi` | `10 mi`, `5.5 mi` | +| | Yards | `yd` | `100 yd`, `50 yd` | +| | Feet | `ft` | `500 ft`, `100 ft` | +| | Inches | `in` | `100 in`, `50 in` | +| **Nautical** | Nautical Miles | `nmi` | `50 nmi`, `10 nmi` | + +--- + +### Examples + +**1. Basic distance comparison (kilometers):** +```sql +SELECT + name, + ST_DISTANCE(POINT(48.8566, 2.3522), location) AS distance +FROM stores +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), location) < 5 km +ORDER BY distance ASC +``` + +**2. Distance range with BETWEEN (kilometers):** +```sql +SELECT + name, + address, + ST_DISTANCE(POINT(48.8566, 2.3522), location) AS distance +FROM stores +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), location) BETWEEN 4 km AND 10 km +ORDER BY distance ASC +``` + +**3. Multiple distance comparisons with different units:** +```sql +SELECT + ST_DISTANCE(POINT(-70.0, 40.0), toLocation) AS d1, + ST_DISTANCE(fromLocation, POINT(-70.0, 40.0)) AS d2, + ST_DISTANCE(POINT(-70.0, 40.0), POINT(0.0, 0.0)) AS d3 +FROM routes +WHERE ST_DISTANCE(POINT(-70.0, 40.0), toLocation) BETWEEN 4000 km AND 5000 km + AND ST_DISTANCE(fromLocation, toLocation) < 2000 km + AND ST_DISTANCE(POINT(-70.0, 40.0), POINT(-70.0, 40.0)) < 1000 km +``` + +**4. Distance in miles:** +```sql +SELECT + name, + cuisine, + rating, + ST_DISTANCE(POINT(40.7128, -74.0060), location) AS distance +FROM restaurants +WHERE ST_DISTANCE(POINT(40.7128, -74.0060), location) <= 2 mi + AND rating >= 4.0 +ORDER BY rating DESC, distance ASC +LIMIT 20 +``` + +**5. Distance in meters:** +```sql +SELECT + building_name, + ST_DISTANCE(POINT(48.8566, 2.3522), entrance_location) AS distance +FROM buildings +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), entrance_location) < 500 m +ORDER BY distance ASC +``` + +**6. Distance in feet:** +```sql +SELECT + landmark_name, + ST_DISTANCE(POINT(40.7128, -74.0060), location) AS distance +FROM landmarks +WHERE ST_DISTANCE(POINT(40.7128, -74.0060), location) <= 1000 ft +ORDER BY distance ASC +``` + +**7. Distance in nautical miles:** +```sql +SELECT + ship_name, + DISTANCE(port_location, ship_location) AS distance +FROM vessels +WHERE DISTANCE(port_location, ship_location) < 50 nmi +ORDER BY distance ASC +``` + +**8. Distance between two fields:** +```sql +SELECT + route_id, + origin_name, + destination_name, + ST_DISTANCE(origin_location, destination_location) AS distance_km +FROM routes +WHERE ST_DISTANCE(origin_location, destination_location) > 100 km + AND ST_DISTANCE(origin_location, destination_location) < 500 km +ORDER BY distance_km DESC +``` + +**9. Distance between two fixed points (precomputed):** +```sql +-- Paris to New York +SELECT ST_DISTANCE( + POINT(48.8566, 2.3522), -- Paris + POINT(40.7128, -74.0060) -- New York +) AS distance +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), POINT(40.7128, -74.0060)) > 5000 km +-- Distance is precomputed at query compilation time +``` + +**10. Multiple distance conditions:** +```sql +SELECT + name, + category, + ST_DISTANCE(POINT(51.5074, -0.1278), location) AS distance +FROM points_of_interest +WHERE ST_DISTANCE(POINT(51.5074, -0.1278), location) >= 1 km + AND ST_DISTANCE(POINT(51.5074, -0.1278), location) <= 10 km + AND category IN ('restaurant', 'cafe', 'bar') +ORDER BY distance ASC +``` + +**11. Using DISTANCE alias:** +```sql +SELECT + name, + DISTANCE(POINT(48.8566, 2.3522), location) AS dist +FROM hotels +WHERE DISTANCE(POINT(48.8566, 2.3522), location) < 3 km +ORDER BY dist ASC +``` + +**12. Complex distance query with multiple units:** +```sql +SELECT + store_id, + name, + ST_DISTANCE(warehouse_location, store_location) AS warehouse_dist, + ST_DISTANCE(POINT(48.8566, 2.3522), store_location) AS city_center_dist +FROM stores +WHERE ST_DISTANCE(warehouse_location, store_location) < 50 km + AND ST_DISTANCE(POINT(48.8566, 2.3522), store_location) BETWEEN 5 km AND 20 km +ORDER BY city_center_dist ASC +``` + +**13. Delivery zone classification:** +```sql +SELECT + order_id, + customer_address, + ST_DISTANCE(warehouse_location, delivery_location) AS distance, + CASE + WHEN ST_DISTANCE(warehouse_location, delivery_location) <= 5 km THEN 'Zone 1 - Free' + WHEN ST_DISTANCE(warehouse_location, delivery_location) <= 15 km THEN 'Zone 2 - Standard' + WHEN ST_DISTANCE(warehouse_location, delivery_location) <= 30 km THEN 'Zone 3 - Extended' + ELSE 'Zone 4 - Premium' + END AS delivery_zone +FROM orders +WHERE order_date >= CURRENT_DATE + AND ST_DISTANCE(warehouse_location, delivery_location) < 50 km +ORDER BY distance ASC +``` + +**14. Geofencing with meters:** +```sql +SELECT + device_id, + user_name, + timestamp, + ST_DISTANCE(POINT(48.8566, 2.3522), current_location) AS distance, + CASE + WHEN ST_DISTANCE(POINT(48.8566, 2.3522), current_location) <= 50 m THEN 'Inside - Core' + WHEN ST_DISTANCE(POINT(48.8566, 2.3522), current_location) <= 100 m THEN 'Inside - Buffer' + WHEN ST_DISTANCE(POINT(48.8566, 2.3522), current_location) <= 500 m THEN 'Nearby' + ELSE 'Outside' + END AS geofence_status +FROM device_locations +WHERE timestamp >= NOW() - INTERVAL 15 MINUTE + AND ST_DISTANCE(POINT(48.8566, 2.3522), current_location) < 1000 m +ORDER BY timestamp DESC +``` + +**15. Maritime routes (nautical miles):** +```sql +SELECT + route_id, + origin_port, + destination_port, + ST_DISTANCE(origin_location, destination_location) AS distance_nmi +FROM shipping_routes +WHERE ST_DISTANCE(origin_location, destination_location) BETWEEN 100 nmi AND 500 nmi + AND active = true +ORDER BY distance_nmi ASC +``` + +**16. Aggregation with distance filtering:** +```sql +SELECT + category, + COUNT(*) as store_count, + AVG(ST_DISTANCE(POINT(48.8566, 2.3522), location)) as avg_distance +FROM stores +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), location) < 20 km +GROUP BY category +HAVING COUNT(*) > 5 +ORDER BY avg_distance ASC +``` + +**17. Nearest locations with limit:** +```sql +SELECT + airport_code, + airport_name, + city, + ST_DISTANCE(POINT(48.8566, 2.3522), location) AS distance +FROM airports +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), location) < 200 km +ORDER BY distance ASC +LIMIT 5 +``` + +**18. Distance comparison between multiple points:** +```sql +SELECT + location_id, + name, + ST_DISTANCE(POINT(48.8566, 2.3522), location) AS dist_from_paris, + ST_DISTANCE(POINT(51.5074, -0.1278), location) AS dist_from_london, + ST_DISTANCE(POINT(52.5200, 13.4050), location) AS dist_from_berlin +FROM european_offices +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), location) < 500 km + OR ST_DISTANCE(POINT(51.5074, -0.1278), location) < 500 km + OR ST_DISTANCE(POINT(52.5200, 13.4050), location) < 500 km +ORDER BY dist_from_paris ASC +``` + +--- + +### POINT + +Creates a geo-point from latitude and longitude. + +**Syntax:** +```sql +POINT(latitude, longitude) +``` + +**Inputs:** +- `latitude` - DOUBLE (-90 to 90) + - Positive values: North + - Negative values: South +- `longitude` - DOUBLE (-180 to 180) + - Positive values: East + - Negative values: West + +**Output:** +- `geo_point` type + +**Examples:** + +**1. Create a point:** +```sql +SELECT POINT(48.8566, 2.3522) as paris_location +-- Paris: 48.8566°N, 2.3522°E +``` + +**2. Major world cities:** +```sql +-- Paris, France +SELECT POINT(48.8566, 2.3522) as paris + +-- London, UK +SELECT POINT(51.5074, -0.1278) as london + +-- New York, USA +SELECT POINT(40.7128, -74.0060) as new_york + +-- Tokyo, Japan +SELECT POINT(35.6762, 139.6503) as tokyo + +-- Sydney, Australia +SELECT POINT(-33.8688, 151.2093) as sydney +``` + +**3. Use in distance calculation:** +```sql +-- Distance between Paris and London +SELECT + ST_DISTANCE(POINT(48.8566, 2.3522), POINT(51.5074, -0.1278)) AS distance +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), POINT(51.5074, -0.1278)) < 400 km +-- Result: precomputed distance, approximately 343 km +``` + +**4. Use in WHERE clause:** +```sql +SELECT * +FROM stores +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), location) < 5 km +``` + +--- + +### Practical Geo-Spatial Examples + +**1. Store Locator (within 5km):** +```sql +SELECT + name, + address, + phone, + ST_DISTANCE(POINT(48.8566, 2.3522), location) AS distance +FROM stores +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), location) <= 5 km + AND open_now = true +ORDER BY distance ASC +LIMIT 10 +``` + +**2. Restaurant Finder (within 1 mile):** +```sql +SELECT + name, + cuisine, + rating, + ST_DISTANCE(POINT(40.7128, -74.0060), location) AS distance +FROM restaurants +WHERE ST_DISTANCE(POINT(40.7128, -74.0060), location) <= 1 mi + AND rating >= 4.0 +ORDER BY rating DESC, distance ASC +LIMIT 20 +``` + +**3. Delivery Zone Analysis:** +```sql +SELECT + order_id, + customer_name, + ST_DISTANCE(warehouse_location, delivery_location) AS distance, + CASE + WHEN ST_DISTANCE(warehouse_location, delivery_location) <= 5 km THEN 'Local - Free Delivery' + WHEN ST_DISTANCE(warehouse_location, delivery_location) <= 20 km THEN 'Regional - 5€' + WHEN ST_DISTANCE(warehouse_location, delivery_location) <= 50 km THEN 'Extended - 15€' + ELSE 'Long Distance - 30€' + END AS delivery_zone +FROM orders +WHERE order_date >= CURRENT_DATE + AND ST_DISTANCE(warehouse_location, delivery_location) < 100 km +ORDER BY distance ASC +``` + +**4. Nearest Airport Search:** +```sql +SELECT + airport_code, + airport_name, + city, + country, + ST_DISTANCE(POINT(48.8566, 2.3522), location) AS distance +FROM airports +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), location) < 100 km +ORDER BY distance ASC +LIMIT 5 +``` + +**5. Maritime Route Planning:** +```sql +SELECT + route_id, + origin_port, + destination_port, + ST_DISTANCE(origin_location, destination_location) AS distance_nmi +FROM shipping_routes +WHERE ST_DISTANCE(origin_location, destination_location) BETWEEN 50 nmi AND 200 nmi + AND active = true +ORDER BY distance_nmi ASC +``` + +**6. Coverage Analysis by Store:** +```sql +SELECT + s.store_id, + s.name as store_name, + s.city, + COUNT(DISTINCT c.customer_id) as customers_within_5km, + COUNT(DISTINCT CASE + WHEN ST_DISTANCE(s.location, c.location) <= 2 km + THEN c.customer_id + END) as customers_within_2km +FROM stores s +JOIN customers c ON ST_DISTANCE(s.location, c.location) <= 5 km +WHERE s.active = true +GROUP BY s.store_id, s.name, s.city +ORDER BY customers_within_5km DESC +``` + +**7. Real-time Geofencing:** +```sql +SELECT + device_id, + user_name, + timestamp, + ST_DISTANCE(POINT(48.8566, 2.3522), current_location) AS distance, + CASE + WHEN ST_DISTANCE(POINT(48.8566, 2.3522), current_location) <= 50 m THEN 'Inside' + WHEN ST_DISTANCE(POINT(48.8566, 2.3522), current_location) <= 100 m THEN 'Near' + ELSE 'Outside' + END AS status +FROM device_locations +WHERE timestamp >= NOW() - INTERVAL 15 MINUTE + AND ST_DISTANCE(POINT(48.8566, 2.3522), current_location) < 500 m +ORDER BY timestamp DESC +``` + +**8. Multi-Distance Route Filtering:** +```sql +SELECT + route_id, + origin_city, + destination_city, + ST_DISTANCE(origin_location, destination_location) AS route_distance, + ST_DISTANCE(POINT(48.8566, 2.3522), origin_location) AS origin_from_paris, + ST_DISTANCE(POINT(48.8566, 2.3522), destination_location) AS dest_from_paris +FROM routes +WHERE ST_DISTANCE(origin_location, destination_location) BETWEEN 100 km AND 500 km + AND (ST_DISTANCE(POINT(48.8566, 2.3522), origin_location) < 50 km + OR ST_DISTANCE(POINT(48.8566, 2.3522), destination_location) < 50 km) +ORDER BY route_distance ASC +``` + +**9. Distance-Based Pricing:** +```sql +SELECT + order_id, + customer_name, + ST_DISTANCE(warehouse_location, delivery_location) AS distance, + CASE + WHEN ST_DISTANCE(warehouse_location, delivery_location) <= 10 km THEN 5.00 + WHEN ST_DISTANCE(warehouse_location, delivery_location) <= 25 km THEN 10.00 + WHEN ST_DISTANCE(warehouse_location, delivery_location) <= 50 km THEN 20.00 + ELSE 35.00 + END AS shipping_cost +FROM orders +WHERE order_date >= CURRENT_DATE + AND ST_DISTANCE(warehouse_location, delivery_location) < 100 km +ORDER BY distance ASC +``` + +**10. Proximity Search with Multiple Criteria:** +```sql +SELECT + hotel_id, + name, + rating, + price_per_night, + ST_DISTANCE(POINT(48.8566, 2.3522), location) AS distance_from_center, + ST_DISTANCE(airport_location, location) AS distance_from_airport +FROM hotels +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), location) < 5 km + AND ST_DISTANCE(airport_location, location) < 30 km + AND rating >= 4.0 + AND price_per_night <= 200 +ORDER BY rating DESC, distance_from_center ASC +LIMIT 10 +``` + +--- + +### Common Distance Thresholds by Unit + +```sql +-- Kilometers +WHERE ST_DISTANCE(point1, point2) < 1 km -- 1 kilometer +WHERE ST_DISTANCE(point1, point2) <= 5 km -- 5 kilometers +WHERE ST_DISTANCE(point1, point2) BETWEEN 10 km AND 50 km + +-- Miles +WHERE ST_DISTANCE(point1, point2) < 1 mi -- 1 mile +WHERE ST_DISTANCE(point1, point2) <= 5 mi -- 5 miles +WHERE ST_DISTANCE(point1, point2) BETWEEN 10 mi AND 50 mi + +-- Meters +WHERE ST_DISTANCE(point1, point2) < 100 m -- 100 meters +WHERE ST_DISTANCE(point1, point2) <= 500 m -- 500 meters +WHERE ST_DISTANCE(point1, point2) BETWEEN 1000 m AND 5000 m + +-- Feet +WHERE ST_DISTANCE(point1, point2) < 100 ft -- 100 feet +WHERE ST_DISTANCE(point1, point2) <= 500 ft -- 500 feet +WHERE ST_DISTANCE(point1, point2) BETWEEN 1000 ft AND 5000 ft + +-- Nautical Miles +WHERE ST_DISTANCE(point1, point2) < 10 nmi -- 10 nautical miles +WHERE ST_DISTANCE(point1, point2) <= 50 nmi -- 50 nautical miles +WHERE ST_DISTANCE(point1, point2) BETWEEN 100 nmi AND 500 nmi + +-- Yards +WHERE ST_DISTANCE(point1, point2) < 100 yd -- 100 yards +WHERE ST_DISTANCE(point1, point2) <= 500 yd -- 500 yards + +-- Centimeters +WHERE ST_DISTANCE(point1, point2) < 100 cm -- 100 centimeters +WHERE ST_DISTANCE(point1, point2) <= 500 cm -- 500 centimeters + +-- Millimeters +WHERE ST_DISTANCE(point1, point2) < 1000 mm -- 1000 millimeters +WHERE ST_DISTANCE(point1, point2) <= 5000 mm -- 5000 millimeters + +-- Inches +WHERE ST_DISTANCE(point1, point2) < 100 in -- 100 inches +WHERE ST_DISTANCE(point1, point2) <= 500 in -- 500 inches +``` + +--- + +### Unit Selection Guidelines + +**Choose the appropriate unit based on your use case:** + +| Use Case | Recommended Unit | Example Query | +|--------------------|-------------------|-----------------------------------| +| City/Urban search | `km` or `m` | `WHERE ST_DISTANCE(...) < 5 km` | +| Country/Regional | `km` or `mi` | `WHERE ST_DISTANCE(...) < 100 km` | +| Maritime/Aviation | `nmi` | `WHERE ST_DISTANCE(...) < 50 nmi` | +| Building/Indoor | `m` or `ft` | `WHERE ST_DISTANCE(...) < 100 m` | +| Precision tracking | `m` or `cm` | `WHERE ST_DISTANCE(...) < 50 m` | +| US-based apps | `mi` or `ft` | `WHERE ST_DISTANCE(...) < 10 mi` | +| International apps | `km` or `m` | `WHERE ST_DISTANCE(...) < 10 km` | + +--- +``` + +--- + +## **README.md mis à jour** + +```markdown +### Geo-Spatial Queries + +The API supports comprehensive geo-spatial queries with distance calculations: + +```sql +-- Distance in kilometers +SELECT + name, + ST_DISTANCE(POINT(48.8566, 2.3522), location) AS distance +FROM stores +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), location) < 5 km +ORDER BY distance ASC + +-- Distance in miles +SELECT + name, + ST_DISTANCE(POINT(40.7128, -74.0060), location) AS distance +FROM restaurants +WHERE ST_DISTANCE(POINT(40.7128, -74.0060), location) <= 2 mi +ORDER BY distance ASC + +-- Distance range with BETWEEN +SELECT + name, + ST_DISTANCE(POINT(48.8566, 2.3522), location) AS distance +FROM stores +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), location) BETWEEN 4 km AND 10 km + +-- Multiple distance comparisons +SELECT + ST_DISTANCE(POINT(-70.0, 40.0), toLocation) AS d1, + ST_DISTANCE(fromLocation, POINT(-70.0, 40.0)) AS d2 +FROM routes +WHERE ST_DISTANCE(POINT(-70.0, 40.0), toLocation) BETWEEN 4000 km AND 5000 km + AND ST_DISTANCE(fromLocation, toLocation) < 2000 km + +-- Distance in nautical miles +SELECT + ship_name, + DISTANCE(port_location, ship_location) AS distance +FROM vessels +WHERE DISTANCE(port_location, ship_location) < 50 nmi + +-- Distance in meters for precision +SELECT + device_id, + ST_DISTANCE(POINT(48.8566, 2.3522), current_location) AS distance +FROM devices +WHERE ST_DISTANCE(POINT(48.8566, 2.3522), current_location) < 100 m +``` + +**Supported Distance Units:** +- **Metric**: `km`, `m`, `cm`, `mm` +- **Imperial**: `mi`, `yd`, `ft`, `in` +- **Nautical**: `nmi` + +**Syntax**: Specify the unit directly after the numeric value in comparisons. + +[Back to index](README.md) diff --git a/documentation/sql/functions_math.md b/documentation/sql/functions_math.md new file mode 100644 index 00000000..0181f9eb --- /dev/null +++ b/documentation/sql/functions_math.md @@ -0,0 +1,1073 @@ +[Back to index](README.md) + +## Mathematical Functions + +**Navigation:** [Aggregate Functions](functions_aggregate.md) · [String Functions](functions_string.md) + +--- + +### Basic Arithmetic Functions + +#### ABS + +Absolute value. + +**Syntax:** +```sql +ABS(x) +``` + +**Inputs:** +- `x` - `NUMERIC` + +**Output:** +- `NUMERIC` + +**Examples:** +```sql +-- Absolute value of negative number +SELECT ABS(-5) AS a; +-- Result: 5 + +-- Absolute value of positive number +SELECT ABS(5) AS a; +-- Result: 5 + +-- Absolute value of zero +SELECT ABS(0) AS a; +-- Result: 0 + +-- With decimals +SELECT ABS(-123.456) AS a; +-- Result: 123.456 + +-- In WHERE clause +SELECT * FROM transactions +WHERE ABS(amount) > 1000; + +-- Calculate absolute difference +SELECT + order_id, + estimated_price, + actual_price, + ABS(estimated_price - actual_price) AS price_difference +FROM orders; +``` + +--- + +#### SIGN / SGN + +Returns -1, 0, or 1 according to sign. + +**Syntax:** +```sql +SIGN(x) +SGN(x) +``` + +**Inputs:** +- `x` - `NUMERIC` + +**Output:** +- `TINYINT` (-1, 0, or 1) + +**Examples:** +```sql +-- Sign of negative number +SELECT SIGN(-10) AS s; +-- Result: -1 + +-- Sign of positive number +SELECT SIGN(10) AS s; +-- Result: 1 + +-- Sign of zero +SELECT SIGN(0) AS s; +-- Result: 0 + +-- Using SGN alias +SELECT SGN(-5.5) AS s; +-- Result: -1 + +-- Classify values +SELECT + transaction_id, + amount, + CASE SIGN(amount) + WHEN 1 THEN 'Credit' + WHEN -1 THEN 'Debit' + ELSE 'Zero' + END AS transaction_type +FROM transactions; + +-- Count positive and negative values +SELECT + COUNT(CASE WHEN SIGN(balance) = 1 THEN 1 END) AS positive_count, + COUNT(CASE WHEN SIGN(balance) = -1 THEN 1 END) AS negative_count, + COUNT(CASE WHEN SIGN(balance) = 0 THEN 1 END) AS zero_count +FROM accounts; +``` + +--- + +### Rounding Functions + +#### ROUND + +Round to n decimals (optional). + +**Syntax:** +```sql +ROUND(x) +ROUND(x, n) +``` + +**Inputs:** +- `x` - `NUMERIC` +- `n` (optional) - `INT` (number of decimal places, default: 0) + +**Output:** +- `DOUBLE` + +**Examples:** +```sql +-- Round to 2 decimals +SELECT ROUND(123.456, 2) AS r; +-- Result: 123.46 + +-- Round to nearest integer +SELECT ROUND(123.456) AS r; +-- Result: 123 + +-- Round to 1 decimal +SELECT ROUND(123.456, 1) AS r; +-- Result: 123.5 + +-- Round negative number +SELECT ROUND(-123.456, 2) AS r; +-- Result: -123.46 + +-- Round to tens place (negative decimals) +SELECT ROUND(123.456, -1) AS r; +-- Result: 120 + +-- Round prices +SELECT + product_id, + price, + ROUND(price * 1.2, 2) AS price_with_tax +FROM products; + +-- Round averages +SELECT + category, + ROUND(AVG(price), 2) AS avg_price +FROM products +GROUP BY category; +``` + +--- + +#### FLOOR + +Greatest `BIGINT` ≤ x (round down). + +**Syntax:** +```sql +FLOOR(x) +``` + +**Inputs:** +- `x` - `NUMERIC` + +**Output:** +- `BIGINT` + +**Examples:** +```sql +-- Floor of positive number +SELECT FLOOR(3.9) AS f; +-- Result: 3 + +-- Floor of negative number +SELECT FLOOR(-3.1) AS f; +-- Result: -4 + +-- Floor of integer +SELECT FLOOR(5) AS f; +-- Result: 5 + +-- Floor with decimals +SELECT FLOOR(123.999) AS f; +-- Result: 123 + +-- Calculate age in complete years +SELECT + user_id, + name, + FLOOR(DATEDIFF(CURRENT_DATE, birth_date, DAY) / 365.25) AS age +FROM users; + +-- Bucket values +SELECT + FLOOR(price / 10) * 10 AS price_bucket, + COUNT(*) AS count +FROM products +GROUP BY price_bucket +ORDER BY price_bucket; +``` + +--- + +#### CEIL / CEILING + +Smallest `BIGINT` ≥ x (round up). + +**Syntax:** +```sql +CEIL(x) +CEILING(x) +``` + +**Inputs:** +- `x` - `NUMERIC` + +**Output:** +- `BIGINT` + +**Examples:** +```sql +-- Ceiling of positive number +SELECT CEIL(3.1) AS c; +-- Result: 4 + +-- Ceiling of negative number +SELECT CEIL(-3.9) AS c; +-- Result: -3 + +-- Using CEILING alias +SELECT CEILING(2.001) AS c; +-- Result: 3 + +-- Ceiling of integer +SELECT CEIL(5) AS c; +-- Result: 5 + +-- Calculate required packages +SELECT + order_id, + total_items, + CEIL(total_items / 10.0) AS packages_needed +FROM orders; + +-- Round up prices +SELECT + product_id, + CEIL(price) AS rounded_price +FROM products; +``` + +--- + +### Power and Root Functions + +#### POWER / POW + +x raised to the power of y (x^y). + +**Syntax:** +```sql +POWER(x, y) +POW(x, y) +``` + +**Inputs:** +- `x` - `NUMERIC` (base) +- `y` - `NUMERIC` (exponent) + +**Output:** +- `NUMERIC` + +**Examples:** +```sql +-- 2 to the power of 10 +SELECT POWER(2, 10) AS p; +-- Result: 1024 + +-- Using POW alias +SELECT POW(3, 4) AS p; +-- Result: 81 + +-- Square +SELECT POWER(5, 2) AS square; +-- Result: 25 + +-- Cube +SELECT POWER(3, 3) AS cube; +-- Result: 27 + +-- Fractional exponent (root) +SELECT POWER(16, 0.5) AS p; +-- Result: 4 (square root) + +-- Negative exponent +SELECT POWER(2, -3) AS p; +-- Result: 0.125 (1/8) + +-- Calculate compound interest +SELECT + initial_amount, + ROUND(initial_amount * POWER(1 + interest_rate, years), 2) AS final_amount +FROM investments; + +-- Exponential growth +SELECT + day, + POWER(1.05, day) AS growth_factor +FROM generate_series(0, 10) AS day; +``` + +--- + +#### SQRT + +Square root. + +**Syntax:** +```sql +SQRT(x) +``` + +**Inputs:** +- `x` - `NUMERIC` (≥ 0) + +**Output:** +- `NUMERIC` + +**Examples:** +```sql +-- Square root of 16 +SELECT SQRT(16) AS s; +-- Result: 4 + +-- Square root of 2 +SELECT SQRT(2) AS s; +-- Result: 1.414213562... + +-- Square root of 0 +SELECT SQRT(0) AS s; +-- Result: 0 + +-- Calculate standard deviation component +SELECT + category, + SQRT(AVG(POWER(price - avg_price, 2))) AS std_dev +FROM products +CROSS JOIN ( + SELECT AVG(price) AS avg_price FROM products +) AS avg_calc +GROUP BY category; + +-- Euclidean distance (2D) +SELECT + SQRT(POWER(x2 - x1, 2) + POWER(y2 - y1, 2)) AS distance +FROM coordinates; + +-- Calculate hypotenuse +SELECT + side_a, + side_b, + SQRT(POWER(side_a, 2) + POWER(side_b, 2)) AS hypotenuse +FROM triangles; +``` + +--- + +### Logarithmic and Exponential Functions + +#### LOG / LN + +Natural logarithm (base e). + +**Syntax:** +```sql +LOG(x) +LN(x) +``` + +**Inputs:** +- `x` - `NUMERIC` (> 0) + +**Output:** +- `NUMERIC` + +**Examples:** +```sql +-- Natural log of e +SELECT LOG(EXP(1)) AS l; +-- Result: 1 + +-- Using LN alias +SELECT LN(10) AS l; +-- Result: 2.302585... + +-- Natural log of 1 +SELECT LOG(1) AS l; +-- Result: 0 + +-- Calculate log returns +SELECT + date, + price, + LOG(price / LAG(price) OVER (ORDER BY date)) AS log_return +FROM stock_prices; + +-- Logarithmic scale +SELECT + value, + LOG(value) AS log_value +FROM measurements +WHERE value > 0; +``` + +--- + +#### LOG10 + +Base-10 logarithm. + +**Syntax:** +```sql +LOG10(x) +``` + +**Inputs:** +- `x` - `NUMERIC` (> 0) + +**Output:** +- `NUMERIC` + +**Examples:** +```sql +-- Log base 10 of 1000 +SELECT LOG10(1000) AS l10; +-- Result: 3 + +-- Log base 10 of 100 +SELECT LOG10(100) AS l10; +-- Result: 2 + +-- Log base 10 of 10 +SELECT LOG10(10) AS l10; +-- Result: 1 + +-- Log base 10 of 1 +SELECT LOG10(1) AS l10; +-- Result: 0 + +-- Calculate order of magnitude +SELECT + value, + FLOOR(LOG10(ABS(value))) AS magnitude +FROM measurements +WHERE value != 0; + +-- Logarithmic binning +SELECT + FLOOR(LOG10(population)) AS log_bucket, + COUNT(*) AS city_count +FROM cities +WHERE population > 0 +GROUP BY log_bucket +ORDER BY log_bucket; +``` + +--- + +#### EXP + +Exponential function (e^x). + +**Syntax:** +```sql +EXP(x) +``` + +**Inputs:** +- `x` - `NUMERIC` + +**Output:** +- `NUMERIC` + +**Examples:** +```sql +-- e to the power of 1 +SELECT EXP(1) AS e; +-- Result: 2.718281828... + +-- e to the power of 0 +SELECT EXP(0) AS e; +-- Result: 1 + +-- e to the power of 2 +SELECT EXP(2) AS e; +-- Result: 7.389056... + +-- Negative exponent +SELECT EXP(-1) AS e; +-- Result: 0.367879... + +-- Calculate exponential growth +SELECT + time_period, + initial_value * EXP(growth_rate * time_period) AS projected_value +FROM projections; + +-- Inverse of natural log +SELECT + x, + LOG(x) AS log_x, + EXP(LOG(x)) AS back_to_x +FROM values_table; +``` + +--- + +### Trigonometric Functions + +All trigonometric functions use **radians** as input/output units. + +#### COS + +Cosine function. + +**Syntax:** +```sql +COS(x) +``` + +**Inputs:** +- `x` - `DOUBLE` (angle in radians) + +**Output:** +- `DOUBLE` + +**Examples:** +```sql +-- Cosine of π/3 (60 degrees) +SELECT COS(PI() / 3) AS c; +-- Result: 0.5 + +-- Cosine of 0 +SELECT COS(0) AS c; +-- Result: 1 + +-- Cosine of π/2 (90 degrees) +SELECT COS(PI() / 2) AS c; +-- Result: 0 (approximately) + +-- Cosine of π (180 degrees) +SELECT COS(PI()) AS c; +-- Result: -1 + +-- Convert degrees to radians and calculate +SELECT COS(RADIANS(60)) AS c; +-- Result: 0.5 +``` + +--- + +#### ACOS + +Arc cosine (inverse cosine). + +**Syntax:** +```sql +ACOS(x) +``` + +**Inputs:** +- `x` - `DOUBLE` (value between -1 and 1) + +**Output:** +- `DOUBLE` (angle in radians, range [0, π]) + +**Examples:** +```sql +-- Arc cosine of 0.5 +SELECT ACOS(0.5) AS ac; +-- Result: 1.047197... (π/3 or 60 degrees) + +-- Arc cosine of 1 +SELECT ACOS(1) AS ac; +-- Result: 0 + +-- Arc cosine of -1 +SELECT ACOS(-1) AS ac; +-- Result: 3.141592... (π or 180 degrees) + +-- Arc cosine of 0 +SELECT ACOS(0) AS ac; +-- Result: 1.570796... (π/2 or 90 degrees) + +-- Convert result to degrees +SELECT DEGREES(ACOS(0.5)) AS angle_degrees; +-- Result: 60 +``` + +--- + +#### SIN + +Sine function. + +**Syntax:** +```sql +SIN(x) +``` + +**Inputs:** +- `x` - `DOUBLE` (angle in radians) + +**Output:** +- `DOUBLE` + +**Examples:** +```sql +-- Sine of π/6 (30 degrees) +SELECT SIN(PI() / 6) AS s; +-- Result: 0.5 + +-- Sine of 0 +SELECT SIN(0) AS s; +-- Result: 0 + +-- Sine of π/2 (90 degrees) +SELECT SIN(PI() / 2) AS s; +-- Result: 1 + +-- Sine of π (180 degrees) +SELECT SIN(PI()) AS s; +-- Result: 0 (approximately) + +-- Convert degrees to radians +SELECT SIN(RADIANS(30)) AS s; +-- Result: 0.5 +``` + +--- + +#### ASIN + +Arc sine (inverse sine). + +**Syntax:** +```sql +ASIN(x) +``` + +**Inputs:** +- `x` - `DOUBLE` (value between -1 and 1) + +**Output:** +- `DOUBLE` (angle in radians, range [-π/2, π/2]) + +**Examples:** +```sql +-- Arc sine of 0.5 +SELECT ASIN(0.5) AS as; +-- Result: 0.523598... (π/6 or 30 degrees) + +-- Arc sine of 1 +SELECT ASIN(1) AS as; +-- Result: 1.570796... (π/2 or 90 degrees) + +-- Arc sine of -1 +SELECT ASIN(-1) AS as; +-- Result: -1.570796... (-π/2 or -90 degrees) + +-- Arc sine of 0 +SELECT ASIN(0) AS as; +-- Result: 0 + +-- Convert to degrees +SELECT DEGREES(ASIN(0.5)) AS angle_degrees; +-- Result: 30 +``` + +--- + +#### TAN + +Tangent function. + +**Syntax:** +```sql +TAN(x) +``` + +**Inputs:** +- `x` - `DOUBLE` (angle in radians) + +**Output:** +- `DOUBLE` + +**Examples:** +```sql +-- Tangent of π/4 (45 degrees) +SELECT TAN(PI() / 4) AS t; +-- Result: 1 + +-- Tangent of 0 +SELECT TAN(0) AS t; +-- Result: 0 + +-- Tangent of π/6 (30 degrees) +SELECT TAN(PI() / 6) AS t; +-- Result: 0.577350... (1/√3) + +-- Convert degrees to radians +SELECT TAN(RADIANS(45)) AS t; +-- Result: 1 + +-- Calculate slope +SELECT + rise, + run, + TAN(ATAN(rise / run)) AS slope_verified +FROM slopes; +``` + +--- + +#### ATAN + +Arc tangent (inverse tangent). + +**Syntax:** +```sql +ATAN(x) +``` + +**Inputs:** +- `x` - `DOUBLE` + +**Output:** +- `DOUBLE` (angle in radians, range [-π/2, π/2]) + +**Examples:** +```sql +-- Arc tangent of 1 +SELECT ATAN(1) AS at; +-- Result: 0.785398... (π/4 or 45 degrees) + +-- Arc tangent of 0 +SELECT ATAN(0) AS at; +-- Result: 0 + +-- Arc tangent of √3 +SELECT ATAN(SQRT(3)) AS at; +-- Result: 1.047197... (π/3 or 60 degrees) + +-- Convert to degrees +SELECT DEGREES(ATAN(1)) AS angle_degrees; +-- Result: 45 + +-- Calculate angle from slope +SELECT + rise, + run, + DEGREES(ATAN(rise / run)) AS angle_degrees +FROM slopes; +``` + +--- + +#### ATAN2 + +Two-argument arc tangent (returns angle in correct quadrant). + +**Syntax:** +```sql +ATAN2(y, x) +``` + +**Inputs:** +- `y` - `DOUBLE` (y-coordinate) +- `x` - `DOUBLE` (x-coordinate) + +**Output:** +- `DOUBLE` (angle in radians, range [-π, π]) + +**Examples:** +```sql +-- Angle to point (1, 1) - 45 degrees +SELECT ATAN2(1, 1) AS angle; +-- Result: 0.785398... (π/4) + +-- Angle to point (1, 0) - 90 degrees +SELECT ATAN2(1, 0) AS angle; +-- Result: 1.570796... (π/2) + +-- Angle to point (0, 1) - 0 degrees +SELECT ATAN2(0, 1) AS angle; +-- Result: 0 + +-- Angle to point (-1, -1) - 225 degrees (3rd quadrant) +SELECT ATAN2(-1, -1) AS angle; +-- Result: -2.356194... (-3π/4) + +-- Convert to degrees +SELECT DEGREES(ATAN2(1, 1)) AS angle_degrees; +-- Result: 45 + +-- Calculate bearing/azimuth +SELECT + point_id, + DEGREES(ATAN2(delta_y, delta_x)) AS bearing_degrees +FROM coordinates; + +-- Calculate angle between two points +SELECT + DEGREES(ATAN2(y2 - y1, x2 - x1)) AS angle +FROM point_pairs; +``` + +--- + +### Helper Functions for Trigonometry + +#### PI + +Returns the value of π (pi). + +**Syntax:** +```sql +PI() +``` + +**Output:** +- `DOUBLE` (3.141592653589793) + +**Examples:** +```sql +-- Value of π +SELECT PI() AS pi; +-- Result: 3.141592653589793 + +-- Calculate circle area +SELECT + radius, + PI() * POWER(radius, 2) AS area +FROM circles; + +-- Calculate circle circumference +SELECT + radius, + 2 * PI() * radius AS circumference +FROM circles; + +-- Convert degrees to radians +SELECT + degrees, + degrees * PI() / 180 AS radians +FROM angles; +``` + +--- + +#### RADIANS + +Convert degrees to radians. + +**Syntax:** +```sql +RADIANS(degrees) +``` + +**Inputs:** +- `degrees` - `DOUBLE` + +**Output:** +- `DOUBLE` (radians) + +**Examples:** +```sql +-- Convert 180 degrees to radians +SELECT RADIANS(180) AS rad; +-- Result: 3.141592... (π) + +-- Convert 90 degrees to radians +SELECT RADIANS(90) AS rad; +-- Result: 1.570796... (π/2) + +-- Convert 45 degrees to radians +SELECT RADIANS(45) AS rad; +-- Result: 0.785398... (π/4) + +-- Use in trigonometric functions +SELECT SIN(RADIANS(30)) AS sine_30_degrees; +-- Result: 0.5 +``` + +--- + +#### DEGREES + +Convert radians to degrees. + +**Syntax:** +```sql +DEGREES(radians) +``` + +**Inputs:** +- `radians` - `DOUBLE` + +**Output:** +- `DOUBLE` (degrees) + +**Examples:** +```sql +-- Convert π radians to degrees +SELECT DEGREES(PI()) AS deg; +-- Result: 180 + +-- Convert π/2 radians to degrees +SELECT DEGREES(PI() / 2) AS deg; +-- Result: 90 + +-- Convert π/4 radians to degrees +SELECT DEGREES(PI() / 4) AS deg; +-- Result: 45 + +-- Convert result of ACOS to degrees +SELECT DEGREES(ACOS(0.5)) AS angle_degrees; +-- Result: 60 +``` + +--- + +### Practical Mathematical Examples + +**1. Calculate Distance Using Pythagorean Theorem:** +```sql +SELECT + point_id, + SQRT(POWER(x2 - x1, 2) + POWER(y2 - y1, 2)) AS distance +FROM coordinates; +``` + +**2. Compound Interest Calculation:** +```sql +SELECT + account_id, + principal, + rate, + years, + ROUND(principal * POWER(1 + rate, years), 2) AS final_amount +FROM investments; +``` + +**3. Standard Deviation:** +```sql +SELECT + category, + ROUND(SQRT(AVG(POWER(value - avg_value, 2))), 2) AS std_dev +FROM ( + SELECT + category, + value, + AVG(value) OVER (PARTITION BY category) AS avg_value + FROM measurements +) AS calc +GROUP BY category; +``` + +**4. Normalize Values (0-1 range):** +```sql +SELECT + id, + value, + ROUND( + (value - MIN(value) OVER ()) / + (MAX(value) OVER () - MIN(value) OVER ()), + 4 + ) AS normalized_value +FROM data_table; +``` + +**5. Calculate Angle Between Vectors:** +```sql +SELECT + vector_id, + DEGREES(ATAN2(y, x)) AS angle_degrees, + SQRT(POWER(x, 2) + POWER(y, 2)) AS magnitude +FROM vectors; +``` + +**6. Exponential Moving Average:** +```sql +SELECT + date, + value, + ROUND( + value * EXP(-1 * days_diff / smoothing_factor), + 2 + ) AS ema_weight +FROM time_series; +``` + +**7. Price Buckets (Logarithmic):** +```sql +SELECT + POWER(10, FLOOR(LOG10(price))) AS price_bucket_start, + POWER(10, CEIL(LOG10(price))) AS price_bucket_end, + COUNT(*) AS product_count +FROM products +WHERE price > 0 +GROUP BY FLOOR(LOG10(price)), CEIL(LOG10(price)) +ORDER BY price_bucket_start; +``` + +**8. Calculate Circle Properties:** +```sql +SELECT + circle_id, + radius, + ROUND(2 * PI() * radius, 2) AS circumference, + ROUND(PI() * POWER(radius, 2), 2) AS area +FROM circles; +``` + +--- + +### Mathematical Function Summary + +| Function | Description | Example | +|---------------|---------------------|------------------------------| +| `ABS(x)` | Absolute value | `ABS(-5)` → 5 | +| `SIGN(x)` | Sign (-1, 0, 1) | `SIGN(-10)` → -1 | +| `ROUND(x, n)` | Round to n decimals | `ROUND(123.456, 2)` → 123.46 | +| `FLOOR(x)` | Round down | `FLOOR(3.9)` → 3 | +| `CEIL(x)` | Round up | `CEIL(3.1)` → 4 | +| `POWER(x, y)` | x^y | `POWER(2, 10)` → 1024 | +| `SQRT(x)` | Square root | `SQRT(16)` → 4 | +| `LOG(x)` | Natural log | `LOG(EXP(1))` → 1 | +| `LOG10(x)` | Base-10 log | `LOG10(1000)` → 3 | +| `EXP(x)` | e^x | `EXP(1)` → 2.718... | +| `COS(x)` | Cosine | `COS(PI()/3)` → 0.5 | +| `ACOS(x)` | Arc cosine | `ACOS(0.5)` → π/3 | +| `SIN(x)` | Sine | `SIN(PI()/6)` → 0.5 | +| `ASIN(x)` | Arc sine | `ASIN(0.5)` → π/6 | +| `TAN(x)` | Tangent | `TAN(PI()/4)` → 1 | +| `ATAN(x)` | Arc tangent | `ATAN(1)` → π/4 | +| `ATAN2(y, x)` | Two-arg arc tangent | `ATAN2(1, 1)` → π/4 | +| `PI()` | Value of π | `PI()` → 3.14159... | +| `RADIANS(x)` | Degrees to radians | `RADIANS(180)` → π | +| `DEGREES(x)` | Radians to degrees | `DEGREES(PI())` → 180 | + +--- + +[Back to index](README.md) diff --git a/documentation/sql/functions_string.md b/documentation/sql/functions_string.md new file mode 100644 index 00000000..22b85a98 --- /dev/null +++ b/documentation/sql/functions_string.md @@ -0,0 +1,1267 @@ +[Back to index](README.md) + +## String Functions + +--- + +### Case Conversion Functions + +#### UPPER / UCASE + +Convert string to upper case. + +**Syntax:** +```sql +UPPER(str) +UCASE(str) +``` + +**Inputs:** +- `str` - `VARCHAR` + +**Output:** +- `VARCHAR` + +**Examples:** +```sql +-- Basic uppercase conversion +SELECT UPPER('hello') AS up; +-- Result: 'HELLO' + +-- Using UCASE alias +SELECT UCASE('world') AS up; +-- Result: 'WORLD' + +-- Mixed case +SELECT UPPER('Hello World') AS up; +-- Result: 'HELLO WORLD' + +-- With special characters +SELECT UPPER('café') AS up; +-- Result: 'CAFÉ' + +-- Normalize user input +SELECT + user_id, + UPPER(email) AS normalized_email +FROM users; + +-- Case-insensitive comparison +SELECT * FROM products +WHERE UPPER(name) = UPPER('iPhone'); + +-- Uppercase in concatenation +SELECT CONCAT(UPPER(first_name), ' ', UPPER(last_name)) AS full_name +FROM employees; +``` + +--- + +#### LOWER / LCASE + +Convert string to lower case. + +**Syntax:** +```sql +LOWER(str) +LCASE(str) +``` + +**Inputs:** +- `str` - `VARCHAR` + +**Output:** +- `VARCHAR` + +**Examples:** +```sql +-- Basic lowercase conversion +SELECT LOWER('Hello') AS lo; +-- Result: 'hello' + +-- Using LCASE alias +SELECT LCASE('WORLD') AS lo; +-- Result: 'world' + +-- Mixed case +SELECT LOWER('Hello World') AS lo; +-- Result: 'hello world' + +-- With special characters +SELECT LOWER('CAFÉ') AS lo; +-- Result: 'café' + +-- Normalize email addresses +SELECT + user_id, + LOWER(email) AS email_lower +FROM users; + +-- Case-insensitive search +SELECT * FROM articles +WHERE LOWER(title) LIKE '%elasticsearch%'; + +-- Lowercase tags +SELECT + article_id, + LOWER(tag) AS normalized_tag +FROM article_tags; +``` + +--- + +### Trimming Functions + +#### TRIM + +Trim whitespace from both sides. + +**Syntax:** +```sql +TRIM(str) +``` + +**Inputs:** +- `str` - `VARCHAR` + +**Output:** +- `VARCHAR` + +**Examples:** +```sql +-- Basic trim +SELECT TRIM(' abc ') AS t; +-- Result: 'abc' + +-- Trim tabs and spaces +SELECT TRIM(' abc ') AS t; +-- Result: 'abc' + +-- No whitespace +SELECT TRIM('abc') AS t; +-- Result: 'abc' + +-- Clean user input +SELECT + user_id, + TRIM(username) AS clean_username +FROM users; + +-- Trim in WHERE clause +SELECT * FROM products +WHERE TRIM(name) = 'iPhone'; + +-- Trim before comparison +SELECT + order_id, + TRIM(status) AS status +FROM orders +WHERE TRIM(status) IN ('pending', 'shipped'); +``` + +--- + +#### LTRIM + +Trim whitespace from left side. + +**Syntax:** +```sql +LTRIM(str) +``` + +**Inputs:** +- `str` - `VARCHAR` + +**Output:** +- `VARCHAR` + +**Examples:** +```sql +-- Left trim +SELECT LTRIM(' abc ') AS t; +-- Result: 'abc ' + +-- Remove leading spaces +SELECT LTRIM(' hello') AS t; +-- Result: 'hello' + +-- No leading whitespace +SELECT LTRIM('abc ') AS t; +-- Result: 'abc ' + +-- Clean prefixes +SELECT + product_id, + LTRIM(code) AS trimmed_code +FROM products; +``` + +--- + +#### RTRIM + +Trim whitespace from right side. + +**Syntax:** +```sql +RTRIM(str) +``` + +**Inputs:** +- `str` - `VARCHAR` + +**Output:** +- `VARCHAR` + +**Examples:** +```sql +-- Right trim +SELECT RTRIM(' abc ') AS t; +-- Result: ' abc' + +-- Remove trailing spaces +SELECT RTRIM('hello ') AS t; +-- Result: 'hello' + +-- No trailing whitespace +SELECT RTRIM(' abc') AS t; +-- Result: ' abc' + +-- Clean suffixes +SELECT + product_id, + RTRIM(description) AS trimmed_desc +FROM products; +``` + +--- + +### String Measurement Functions + +#### LENGTH / LEN + +Character length of string. + +**Syntax:** +```sql +LENGTH(str) +LEN(str) +``` + +**Inputs:** +- `str` - `VARCHAR` + +**Output:** +- `BIGINT` + +**Examples:** +```sql +-- Basic length +SELECT LENGTH('abc') AS l; +-- Result: 3 + +-- Using LEN alias +SELECT LEN('hello') AS l; +-- Result: 5 + +-- Empty string +SELECT LENGTH('') AS l; +-- Result: 0 + +-- With spaces +SELECT LENGTH('hello world') AS l; +-- Result: 11 + +-- Unicode characters +SELECT LENGTH('café') AS l; +-- Result: 4 + +-- Filter by length +SELECT * FROM products +WHERE LENGTH(name) > 20; + +-- Validate input length +SELECT + user_id, + username, + LENGTH(username) AS username_length +FROM users +WHERE LENGTH(username) < 3; + +-- Average length +SELECT + category, + ROUND(AVG(LENGTH(description)), 2) AS avg_desc_length +FROM products +GROUP BY category; +``` + +--- + +### String Extraction Functions + +#### SUBSTRING / SUBSTR + +SQL 1-based substring extraction. + +**Syntax:** +```sql +SUBSTRING(str, start) +SUBSTRING(str, start, length) +SUBSTRING(str FROM start) +SUBSTRING(str FROM start FOR length) +SUBSTR(str, start, length) +``` + +**Inputs:** +- `str` - `VARCHAR` +- `start` - `INT` (≥ 1, 1-based index) +- `length` (optional) - `INT` + +**Output:** +- `VARCHAR` + +**Examples:** +```sql +-- Extract with start and length +SELECT SUBSTRING('abcdef', 2, 3) AS s; +-- Result: 'bcd' + +-- Using FROM...FOR syntax +SELECT SUBSTRING('abcdef' FROM 2 FOR 3) AS s; +-- Result: 'bcd' + +-- Extract from position to end +SELECT SUBSTRING('abcdef' FROM 4) AS s; +-- Result: 'def' + +-- Using SUBSTR alias +SELECT SUBSTR('hello world', 7, 5) AS s; +-- Result: 'world' + +-- Extract first character +SELECT SUBSTRING('hello', 1, 1) AS s; +-- Result: 'h' + +-- Extract year from date string +SELECT SUBSTRING('2025-01-10', 1, 4) AS year; +-- Result: '2025' + +-- Extract domain from email +SELECT + email, + SUBSTRING(email FROM POSITION('@' IN email) + 1) AS domain +FROM users; + +-- Extract area code from phone +SELECT + phone, + SUBSTRING(phone, 1, 3) AS area_code +FROM contacts; + +-- Extract product code +SELECT + product_id, + SUBSTRING(product_id FROM 1 FOR 3) AS category_code +FROM products; +``` + +--- + +#### LEFT + +Returns the leftmost characters from a string. + +**Syntax:** +```sql +LEFT(str, length) +LEFT(str FOR length) +``` + +**Inputs:** +- `str` - `VARCHAR` +- `length` - `INT` + +**Output:** +- `VARCHAR` + +**Examples:** +```sql +-- Extract first 3 characters +SELECT LEFT('abcdef', 3) AS l; +-- Result: 'abc' + +-- Using FOR syntax +SELECT LEFT('abcdef' FOR 3) AS l; +-- Result: 'abc' + +-- Extract first character +SELECT LEFT('hello', 1) AS l; +-- Result: 'h' + +-- Length exceeds string +SELECT LEFT('abc', 10) AS l; +-- Result: 'abc' + +-- Extract prefix +SELECT + product_code, + LEFT(product_code, 2) AS category +FROM products; + +-- Extract initials +SELECT + first_name, + LEFT(first_name, 1) AS initial +FROM users; + +-- First word approximation +SELECT + title, + LEFT(title, POSITION(' ' IN title) - 1) AS first_word +FROM articles; +``` + +--- + +#### RIGHT + +Returns the rightmost characters from a string. + +**Syntax:** +```sql +RIGHT(str, length) +RIGHT(str FOR length) +``` + +**Inputs:** +- `str` - `VARCHAR` +- `length` - `INT` (must be ≥ 0) + +**Output:** +- `VARCHAR` + +**Notes:** +- If `length` exceeds string size, returns the full string +- If `length = 0`, returns empty string +- If `length < 0`, raises validation error + +**Examples:** +```sql +-- Extract last 3 characters +SELECT RIGHT('abcdef', 3) AS r; +-- Result: 'def' + +-- Using FOR syntax +SELECT RIGHT('abcdef' FOR 3) AS r; +-- Result: 'def' + +-- Length exceeds string +SELECT RIGHT('abcdef' FOR 10) AS r; +-- Result: 'abcdef' + +-- Extract last character +SELECT RIGHT('hello', 1) AS r; +-- Result: 'o' + +-- Zero length +SELECT RIGHT('hello', 0) AS r; +-- Result: '' + +-- Extract file extension +SELECT + filename, + RIGHT(filename, LENGTH(filename) - POSITION('.' IN filename)) AS extension +FROM files; + +-- Extract last 4 digits +SELECT + credit_card, + RIGHT(credit_card, 4) AS last_four +FROM payments; + +-- Extract suffix +SELECT + product_code, + RIGHT(product_code, 3) AS variant +FROM products; +``` + +--- + +### String Manipulation Functions + +#### CONCAT + +Concatenate values into a string. + +**Syntax:** +```sql +CONCAT(expr1, expr2, ...) +``` + +**Inputs:** +- `expr1, expr2, ...` - Values coercible to `VARCHAR` + +**Output:** +- `VARCHAR` + +**Examples:** +```sql +-- Basic concatenation +SELECT CONCAT('Hello', ' ', 'World') AS greeting; +-- Result: 'Hello World' + +-- Concatenate names +SELECT CONCAT(firstName, ' ', lastName) AS full FROM users; + +-- Multiple values +SELECT CONCAT('Product: ', name, ' - Price: ', price) AS info +FROM products; + +-- With NULL handling +SELECT CONCAT('Hello', NULL, 'World') AS result; +-- Result: NULL (any NULL makes entire result NULL) + +-- Build full address +SELECT + CONCAT(street, ', ', city, ', ', state, ' ', zip) AS full_address +FROM addresses; + +-- Create email +SELECT + CONCAT(LOWER(first_name), '.', LOWER(last_name), '@company.com') AS email +FROM employees; + +-- Build URL +SELECT + CONCAT('https://example.com/products/', product_id) AS url +FROM products; + +-- Format currency +SELECT + product_id, + CONCAT('$', ROUND(price, 2)) AS formatted_price +FROM products; +``` + +--- + +#### REPLACE + +Replaces all occurrences of a substring with another substring. + +**Syntax:** +```sql +REPLACE(str, search, replace) +``` + +**Inputs:** +- `str` - `VARCHAR` (source string) +- `search` - `VARCHAR` (substring to find) +- `replace` - `VARCHAR` (replacement substring) + +**Output:** +- `VARCHAR` + +**Examples:** +```sql +-- Remove prefix +SELECT REPLACE('Mr. John', 'Mr. ', '') AS r; +-- Result: 'John' + +-- Replace word +SELECT REPLACE('Hello World', 'World', 'Universe') AS r; +-- Result: 'Hello Universe' + +-- Remove all spaces +SELECT REPLACE('a b c d', ' ', '') AS r; +-- Result: 'abcd' + +-- Replace multiple occurrences +SELECT REPLACE('aaa', 'a', 'b') AS r; +-- Result: 'bbb' + +-- Clean phone numbers +SELECT + phone, + REPLACE(REPLACE(REPLACE(phone, '-', ''), '(', ''), ')', '') AS clean_phone +FROM contacts; + +-- Normalize URLs +SELECT + url, + REPLACE(url, 'http://', 'https://') AS secure_url +FROM links; + +-- Remove special characters +SELECT + product_name, + REPLACE(REPLACE(product_name, '/', '-'), '&', 'and') AS clean_name +FROM products; + +-- Mask sensitive data +SELECT + email, + REPLACE(email, SUBSTRING(email FROM 2 FOR 3), '***') AS masked_email +FROM users; +``` + +--- + +#### REVERSE + +Reverses the characters in a string. + +**Syntax:** +```sql +REVERSE(str) +``` + +**Inputs:** +- `str` - `VARCHAR` + +**Output:** +- `VARCHAR` + +**Examples:** +```sql +-- Basic reverse +SELECT REVERSE('abcdef') AS r; +-- Result: 'fedcba' + +-- Reverse word +SELECT REVERSE('hello') AS r; +-- Result: 'olleh' + +-- Palindrome check +SELECT + word, + word = REVERSE(word) AS is_palindrome +FROM words; + +-- Reverse for encoding +SELECT + text, + REVERSE(text) AS reversed +FROM messages; +``` + +--- + +### String Search Functions + +#### POSITION / STRPOS + +Returns the 1-based position of the first occurrence of a substring in a string. + +**Syntax:** +```sql +POSITION(substr, str) +POSITION(substr IN str) +POSITION(substr IN str FROM start) +STRPOS(str, substr) +``` + +**Inputs:** +- `substr` - `VARCHAR` (substring to find) +- `str` - `VARCHAR` (string to search in) +- `start` (optional) - `INT` (1-based starting position) + +**Output:** +- `BIGINT` (returns 0 if not found) + +**Examples:** +```sql +-- Find substring position +SELECT POSITION('lo', 'hello') AS pos; +-- Result: 4 + +-- Using IN syntax +SELECT POSITION('lo' IN 'hello') AS pos; +-- Result: 4 + +-- Start search from position +SELECT POSITION('a' IN 'Elasticsearch' FROM 5) AS pos; +-- Result: 10 + +-- Not found +SELECT POSITION('z' IN 'Elasticsearch') AS pos; +-- Result: 0 + +-- Using STRPOS alias +SELECT STRPOS('hello world', 'world') AS pos; +-- Result: 7 + +-- Find @ in email +SELECT + email, + POSITION('@' IN email) AS at_position +FROM users; + +-- Check if substring exists +SELECT * FROM products +WHERE POSITION('pro' IN LOWER(name)) > 0; + +-- Extract domain from email +SELECT + email, + SUBSTRING(email FROM POSITION('@' IN email) + 1) AS domain +FROM users; + +-- Find first space +SELECT + full_name, + POSITION(' ' IN full_name) AS space_pos +FROM contacts; + +-- Multiple searches +SELECT + text, + POSITION('error' IN LOWER(text)) AS error_pos, + POSITION('warning' IN LOWER(text)) AS warning_pos +FROM logs; +``` + +--- + +### Regular Expression Functions + +#### REGEXP_LIKE / REGEXP + +Returns `TRUE` if the input string matches the regular expression pattern. + +**Syntax:** +```sql +REGEXP_LIKE(string, pattern) +REGEXP_LIKE(string, pattern, match_param) +REGEXP(string, pattern) +``` + +**Inputs:** +- `string` - `VARCHAR` (input string to test) +- `pattern` - `VARCHAR` (regular expression pattern) +- `match_param` (optional) - `VARCHAR` (matching behavior control) + - `'i'` - Case-insensitive match + - `'c'` - Case-sensitive match (default) + - `'m'` - Multi-line mode + - `'n'` - Allows `.` to match newline characters + +**Output:** +- `BOOLEAN` + +**Examples:** +```sql +-- Case-sensitive match (default) +SELECT REGEXP_LIKE('Hello', 'HEL'); +-- Result: false + +-- Case-insensitive match +SELECT REGEXP_LIKE('Hello', 'HEL', 'i'); +-- Result: true + +-- Multi-line mode +SELECT REGEXP_LIKE('abc\nxyz', '^xyz', 'm'); +-- Result: true + +-- Using REGEXP alias +SELECT REGEXP('test@example.com', '^[a-z]+@[a-z]+\\.com$', 'i'); +-- Result: true + +-- Match email pattern +SELECT * FROM users +WHERE REGEXP_LIKE(email, '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z]{2,}$', 'i'); + +-- Match phone pattern +SELECT * FROM contacts +WHERE REGEXP_LIKE(phone, '^\\d{3}-\\d{3}-\\d{4}$'); + +-- Match alphanumeric +SELECT * FROM products +WHERE REGEXP_LIKE(sku, '^[A-Z]{3}[0-9]{4}$'); + +-- Match URL pattern +SELECT * FROM links +WHERE REGEXP_LIKE(url, '^https?://[a-z0-9.-]+\\.[a-z]{2,}', 'i'); + +-- Find specific words +SELECT * FROM articles +WHERE REGEXP_LIKE(content, '\\b(elasticsearch|kibana|logstash)\\b', 'i'); + +-- Validate format +SELECT + customer_id, + REGEXP_LIKE(zip_code, '^\\d{5}(-\\d{4})?$') AS valid_zip +FROM customers; + +-- Pattern matching with alternation +SELECT * FROM logs +WHERE REGEXP_LIKE(message, 'error|warning|critical', 'i'); + +-- Match beginning and end +SELECT * FROM products +WHERE REGEXP_LIKE(name, '^iPhone.*Pro$', 'i'); +``` + +--- + +### Full-Text Search Function + +#### MATCH ... AGAINST + +Performs full-text search using Elasticsearch's match query capabilities. Follows MySQL-style syntax. + +**Syntax:** +```sql +MATCH(field1, field2, ...) AGAINST (query_text) +``` + +**Inputs:** +- `field1, field2, ...` - Column names to search in (one or more fields) +- `query_text` - `VARCHAR` (text to search for) + +**Output:** +- `BOOLEAN` (when used in WHERE clause) +- `DOUBLE` (relevance score when used in SELECT) + +**Examples:** + +**Basic Single Field Search:** +```sql +-- Search in one field +SELECT * FROM articles +WHERE MATCH(content) AGAINST ('elasticsearch'); + +-- Search in title +SELECT * FROM articles +WHERE MATCH(title) AGAINST ('tutorial'); +``` + +**Multiple Field Search:** +```sql +-- Search in multiple fields +SELECT * FROM articles +WHERE MATCH(title, content) AGAINST ('elasticsearch tutorial'); + +-- Search across three fields +SELECT * FROM products +WHERE MATCH(name, description, tags) AGAINST ('wireless headphones'); + +-- Search in all text fields +SELECT * FROM documents +WHERE MATCH(title, abstract, content, keywords) AGAINST ('machine learning'); +``` + +**With Relevance Score:** +```sql +-- Get relevance score in results +SELECT + title, + author, + MATCH(title, content) AGAINST ('elasticsearch') AS relevance_score +FROM articles +WHERE MATCH(title, content) AGAINST ('elasticsearch') +ORDER BY relevance_score DESC; + +-- Score from multiple fields +SELECT + product_id, + name, + MATCH(name, description) AGAINST ('laptop') AS score +FROM products +WHERE MATCH(name, description) AGAINST ('laptop') +ORDER BY score DESC +LIMIT 20; +``` + +**Multi-Term Search:** +```sql +-- Search for multiple terms (any can match) +SELECT * FROM articles +WHERE MATCH(content) AGAINST ('elasticsearch kibana logstash'); + +-- Multiple terms across multiple fields +SELECT * FROM products +WHERE MATCH(name, description) AGAINST ('wireless bluetooth speaker'); + +-- Natural language query +SELECT * FROM documents +WHERE MATCH(content) AGAINST ('how to use elasticsearch for search'); +``` + +**Practical Examples:** + +**1. E-commerce Product Search:** +```sql +-- Search products across multiple fields +SELECT + product_id, + name, + price, + MATCH(name, description, category) AGAINST ('wireless headphones') AS relevance +FROM products +WHERE MATCH(name, description, category) AGAINST ('wireless headphones') + AND price BETWEEN 50 AND 200 + AND in_stock = true +ORDER BY relevance DESC +LIMIT 20; +``` + +**2. Blog Article Search:** +```sql +-- Search articles with scoring +SELECT + article_id, + title, + author, + published_date, + MATCH(title, content, tags) AGAINST ('elasticsearch tutorial') AS score +FROM articles +WHERE MATCH(title, content, tags) AGAINST ('elasticsearch tutorial') + AND status = 'published' + AND published_date >= DATE_SUB(CURRENT_DATE, INTERVAL 1 YEAR) +ORDER BY score DESC, published_date DESC +LIMIT 10; +``` + +**3. Documentation Search:** +```sql +-- Search documentation +SELECT + doc_id, + title, + section, + MATCH(title, content) AGAINST ('query syntax') AS relevance +FROM documentation +WHERE MATCH(title, content) AGAINST ('query syntax') +ORDER BY relevance DESC; +``` + +**4. User Search:** +```sql +-- Search users by name, email, bio +SELECT + user_id, + username, + email, + MATCH(username, email, bio) AGAINST ('john developer') AS score +FROM users +WHERE MATCH(username, email, bio) AGAINST ('john developer') +ORDER BY score DESC +LIMIT 10; +``` + +**5. Multi-Field with Weighted Scoring:** +```sql +-- Boost title matches more than content matches +SELECT + article_id, + title, + (MATCH(title) AGAINST ('elasticsearch') * 3 + + MATCH(content) AGAINST ('elasticsearch') * 1) AS weighted_score +FROM articles +WHERE MATCH(title, content) AGAINST ('elasticsearch') +ORDER BY weighted_score DESC; + +-- Different weights for different fields +SELECT + product_id, + name, + (MATCH(name) AGAINST ('laptop gaming') * 5 + + MATCH(description) AGAINST ('laptop gaming') * 2 + + MATCH(tags) AGAINST ('laptop gaming') * 1) AS total_score +FROM products +WHERE MATCH(name, description, tags) AGAINST ('laptop gaming') +ORDER BY total_score DESC; +``` + +**6. Search with Filters:** +```sql +-- Full-text search combined with exact filters +SELECT * FROM products +WHERE MATCH(name, description) AGAINST ('laptop gaming') + AND category = 'electronics' + AND price <= 2000 + AND rating >= 4.0 + AND in_stock = true +ORDER BY MATCH(name, description) AGAINST ('laptop gaming') DESC; +``` + +**7. Search Across All Text Fields:** +```sql +-- Comprehensive search +SELECT + id, + title, + MATCH(title, subtitle, description, content, tags, author, category) + AGAINST ('data science python') AS score +FROM articles +WHERE MATCH(title, subtitle, description, content, tags, author, category) + AGAINST ('data science python') + AND published = true +ORDER BY score DESC +LIMIT 50; +``` + +**8. Search with Pagination:** +```sql +-- Paginated search results +SELECT + product_id, + name, + description, + MATCH(name, description) AGAINST ('smartphone') AS relevance +FROM products +WHERE MATCH(name, description) AGAINST ('smartphone') +ORDER BY relevance DESC +LIMIT 20 OFFSET 0; -- First page + +-- Second page +SELECT + product_id, + name, + description, + MATCH(name, description) AGAINST ('smartphone') AS relevance +FROM products +WHERE MATCH(name, description) AGAINST ('smartphone') +ORDER BY relevance DESC +LIMIT 20 OFFSET 20; -- Second page +``` + +**9. Search with Multiple Conditions:** +```sql +-- Combine multiple searches +SELECT * FROM articles +WHERE (MATCH(title) AGAINST ('elasticsearch') + OR MATCH(content) AGAINST ('elasticsearch')) + AND author = 'John Doe' + AND published_date >= '2024-01-01'; + +-- Search in different field combinations +SELECT + doc_id, + title, + MATCH(title, tags) AGAINST ('tutorial') AS title_tag_score, + MATCH(content) AGAINST ('tutorial') AS content_score +FROM documentation +WHERE MATCH(title, tags, content) AGAINST ('tutorial') +ORDER BY (title_tag_score * 2 + content_score) DESC; +``` + +**10. Search with Aggregations:** +```sql +-- Count results by category +SELECT + category, + COUNT(*) AS result_count +FROM products +WHERE MATCH(name, description) AGAINST ('wireless') +GROUP BY category +ORDER BY result_count DESC; + +-- Average relevance score by category +SELECT + category, + AVG(MATCH(name, description) AGAINST ('laptop')) AS avg_relevance, + COUNT(*) AS product_count +FROM products +WHERE MATCH(name, description) AGAINST ('laptop') +GROUP BY category +ORDER BY avg_relevance DESC; +``` + +**11. Search with Date Ranges:** +```sql +-- Recent articles matching search +SELECT + article_id, + title, + published_date, + MATCH(title, content) AGAINST ('machine learning') AS score +FROM articles +WHERE MATCH(title, content) AGAINST ('machine learning') + AND published_date >= DATE_SUB(CURRENT_DATE, INTERVAL 3 MONTH) +ORDER BY score DESC, published_date DESC; +``` + +**12. Case-Insensitive Search (Automatic):** +```sql +-- Full-text search is automatically case-insensitive +SELECT * FROM products +WHERE MATCH(name) AGAINST ('iPhone'); -- Matches 'iphone', 'IPHONE', 'iPhone' + +SELECT * FROM articles +WHERE MATCH(title) AGAINST ('ELASTICSEARCH'); -- Matches 'elasticsearch', 'Elasticsearch', etc. +``` + +**13. Phrase Search:** +```sql +-- Search for phrases +SELECT * FROM articles +WHERE MATCH(content) AGAINST ('machine learning algorithms'); + +-- Multi-word product search +SELECT * FROM products +WHERE MATCH(name, description) AGAINST ('wireless bluetooth headphones'); +``` + +**14. Search with NULL Handling:** +```sql +-- MATCH handles NULL fields gracefully +SELECT + product_id, + name, + description, + MATCH(name, description, notes) AGAINST ('laptop') AS score +FROM products +WHERE MATCH(name, description, notes) AGAINST ('laptop') +ORDER BY score DESC; +-- Works even if 'notes' is NULL for some records +``` + +**Performance Tips:** + +```sql +-- Good: Use MATCH on full-text indexed fields +SELECT * FROM articles +WHERE MATCH(title, content) AGAINST ('search query') + AND category = 'technology' -- Combine with exact filters + AND published = true; + +-- Good: Limit results for better performance +SELECT * FROM products +WHERE MATCH(name, description) AGAINST ('laptop') +ORDER BY MATCH(name, description) AGAINST ('laptop') DESC +LIMIT 100; + +-- Good: Use specific fields when possible +SELECT * FROM articles +WHERE MATCH(title) AGAINST ('elasticsearch') -- Search only in title +ORDER BY MATCH(title) AGAINST ('elasticsearch') DESC; + +-- Avoid: Don't use MATCH on non-text fields +-- Bad: WHERE MATCH(product_id) AGAINST ('12345') +-- Good: WHERE product_id = 12345 + +-- Avoid: Too many fields can impact performance +-- Consider limiting to most relevant fields +-- Bad: MATCH(field1, field2, ..., field20) AGAINST ('query') +-- Good: MATCH(title, content, tags) AGAINST ('query') +``` + +**Comparison with LIKE:** + +```sql +-- LIKE (slower, no relevance scoring, exact substring) +SELECT * FROM articles +WHERE title LIKE '%elasticsearch%' + OR content LIKE '%elasticsearch%'; + +-- MATCH AGAINST (faster, relevance scoring, full-text) +SELECT * FROM articles +WHERE MATCH(title, content) AGAINST ('elasticsearch') +ORDER BY MATCH(title, content) AGAINST ('elasticsearch') DESC; +``` + +**Common Use Cases:** + +```sql +-- 1. Simple keyword search +SELECT * FROM products +WHERE MATCH(name) AGAINST ('laptop'); + +-- 2. Multi-field search +SELECT * FROM articles +WHERE MATCH(title, content, tags) AGAINST ('elasticsearch tutorial'); + +-- 3. Ranked results +SELECT + title, + MATCH(title, content) AGAINST ('data science') AS relevance +FROM articles +WHERE MATCH(title, content) AGAINST ('data science') +ORDER BY relevance DESC +LIMIT 10; + +-- 4. Search with filters +SELECT * FROM products +WHERE MATCH(name, description) AGAINST ('smartphone') + AND price BETWEEN 200 AND 800 + AND brand = 'Samsung'; + +-- 5. Weighted multi-field search +SELECT + id, + title, + (MATCH(title) AGAINST ('python') * 3 + + MATCH(content) AGAINST ('python')) AS score +FROM articles +WHERE MATCH(title, content) AGAINST ('python') +ORDER BY score DESC; +``` + +--- + +### MATCH AGAINST vs Other String Functions + +| Feature | MATCH AGAINST | LIKE | REGEXP_LIKE | +|---------------------------|------------------|------------|--------------| +| Full-text search | ✅ Yes | ❌ No | ❌ No | +| Relevance scoring | ✅ Yes | ❌ No | ❌ No | +| Multiple fields | ✅ Yes | ❌ No | ❌ No | +| Stemming/Lemmatization | ✅ Yes | ❌ No | ❌ No | +| Performance on large text | ✅ Fast | ❌ Slow | ❌ Slow | +| Exact substring | ⚠️ Use for words | ✅ Yes | ✅ Yes | +| Case sensitivity | ❌ Insensitive | ⚠️ Depends | ⚠️ Optional | +| Natural language | ✅ Yes | ❌ No | ❌ No | + +**When to use MATCH AGAINST:** +- Full-text search across documents +- Need relevance scoring +- Search multiple fields simultaneously +- Natural language queries +- Large text content +- Need stemming/language analysis +- Search engine-like functionality + +**When to use LIKE:** +- Exact substring matching +- Simple pattern matching +- Single field search +- Short strings +- Exact character sequences + +**When to use REGEXP_LIKE:** +- Complex pattern matching +- Need regex capabilities +- Format validation +- Precise pattern requirements + +--- + +### Function Summary + +```sql +-- Basic syntax +MATCH(field) AGAINST ('query') + +-- Multiple fields +MATCH(field1, field2, field3) AGAINST ('query') + +-- With scoring +SELECT field, MATCH(field) AGAINST ('query') AS score +FROM table +WHERE MATCH(field) AGAINST ('query') +ORDER BY score DESC; + +-- Combined with filters +SELECT * FROM table +WHERE MATCH(field1, field2) AGAINST ('query') + AND other_field = 'value' +ORDER BY MATCH(field1, field2) AGAINST ('query') DESC; +``` + +--- + +[Back to index](README.md) diff --git a/documentation/functions_system.md b/documentation/sql/functions_system.md similarity index 80% rename from documentation/functions_system.md rename to documentation/sql/functions_system.md index 03fccc98..3a488921 100644 --- a/documentation/functions_system.md +++ b/documentation/sql/functions_system.md @@ -1,4 +1,4 @@ -[Back to index](./README.md) +[Back to index](README.md) # System Functions @@ -20,4 +20,4 @@ SELECT VERSION() AS v; -- Result: 'sql-elasticsearch-engine 1.0.0' ``` -[Back to index](./README.md) +[Back to index](README.md) diff --git a/documentation/sql/functions_type_conversion.md b/documentation/sql/functions_type_conversion.md new file mode 100644 index 00000000..602f21fe --- /dev/null +++ b/documentation/sql/functions_type_conversion.md @@ -0,0 +1,613 @@ +[Back to index](README.md) +## Type Conversion Functions + +--- + +### CAST / CONVERT + +Cast expression to a target SQL type. + +**Syntax:** +```sql +CAST(expr AS TYPE) +CONVERT(expr, TYPE) +``` + +**Inputs:** +- `expr` - Expression to convert +- `TYPE` - Target data type: + - `VARCHAR` / `STRING` / `TEXT` + - `INT` / `INTEGER` / `BIGINT` / `SMALLINT` / `TINYINT` + - `DOUBLE` / `FLOAT` / `REAL` + - `DECIMAL(p, s)` / `NUMERIC(p, s)` + - `BOOLEAN` / `BOOL` + - `DATE` + - `TIMESTAMP` / `DATETIME` + - `TIME` + +**Output:** +- Value converted to target `TYPE` + +**Behavior:** +- Throws error if conversion fails +- Use `TRY_CAST` for safe conversion + +**Examples:** + +**Numeric Conversions:** +```sql +-- Convert to DOUBLE +SELECT CAST(salary AS DOUBLE) AS s FROM emp; +-- Result: 12345.0 + +-- Integer to DOUBLE +SELECT CAST(100 AS DOUBLE) AS d; +-- Result: 100.0 + +-- String to INT +SELECT CAST('123' AS INT) AS i; +-- Result: 123 + +-- String to DOUBLE +SELECT CAST('123.45' AS DOUBLE) AS d; +-- Result: 123.45 + +-- DOUBLE to INT (truncates) +SELECT CAST(123.99 AS INT) AS i; +-- Result: 123 + +-- Using CONVERT alias +SELECT CONVERT(salary, DOUBLE) AS s FROM emp; +-- Result: 12345.0 +``` + +**String Conversions:** +```sql +-- Number to VARCHAR +SELECT CAST(12345 AS VARCHAR) AS str; +-- Result: '12345' + +-- DOUBLE to VARCHAR +SELECT CAST(123.45 AS VARCHAR) AS str; +-- Result: '123.45' + +-- Boolean to VARCHAR +SELECT CAST(true AS VARCHAR) AS str; +-- Result: 'true' + +-- Date to VARCHAR +SELECT CAST(CURRENT_DATE AS VARCHAR) AS date_str; +-- Result: '2025-10-27' + +-- Timestamp to VARCHAR +SELECT CAST(CURRENT_TIMESTAMP AS VARCHAR) AS ts_str; +-- Result: '2025-10-27 16:10:21' +``` + +**Date and Time Conversions:** +```sql +-- String to DATE +SELECT CAST('2025-01-10' AS DATE) AS d; +-- Result: 2025-01-10 + +-- String to TIMESTAMP +SELECT CAST('2025-01-10 14:30:00' AS TIMESTAMP) AS ts; +-- Result: 2025-01-10 14:30:00 + +-- Timestamp to DATE +SELECT CAST(CURRENT_TIMESTAMP AS DATE) AS d; +-- Result: 2025-10-27 + +-- String with format to DATE +SELECT CAST('2025/01/10' AS DATE) AS d; +-- Result: 2025-01-10 + +-- Unix timestamp to TIMESTAMP +SELECT CAST(1704902400 AS TIMESTAMP) AS ts; +-- Result: 2024-01-10 12:00:00 +``` + +**Boolean Conversions:** +```sql +-- Number to BOOLEAN +SELECT CAST(1 AS BOOLEAN) AS b; +-- Result: true + +SELECT CAST(0 AS BOOLEAN) AS b; +-- Result: false + +-- String to BOOLEAN +SELECT CAST('true' AS BOOLEAN) AS b; +-- Result: true + +SELECT CAST('false' AS BOOLEAN) AS b; +-- Result: false + +-- Boolean to INT +SELECT CAST(true AS INT) AS i; +-- Result: 1 + +SELECT CAST(false AS INT) AS i; +-- Result: 0 +``` + +**Decimal/Numeric Conversions:** +```sql +-- To DECIMAL with precision +SELECT CAST(123.456 AS DECIMAL(10, 2)) AS dec; +-- Result: 123.46 + +-- String to DECIMAL +SELECT CAST('123.456' AS DECIMAL(10, 3)) AS dec; +-- Result: 123.456 + +-- INT to DECIMAL +SELECT CAST(100 AS DECIMAL(10, 2)) AS dec; +-- Result: 100.00 +``` + +**Practical Examples:** + +**1. Normalize data types in queries:** +```sql +-- Ensure consistent numeric types +SELECT + product_id, + CAST(price AS DECIMAL(10, 2)) AS price, + CAST(quantity AS INT) AS quantity +FROM products; +``` + +**2. Convert for calculations:** +```sql +-- Avoid integer division +SELECT + order_id, + total_items, + total_price, + CAST(total_price AS DOUBLE) / CAST(total_items AS DOUBLE) AS avg_price +FROM orders; +``` + +**3. Format output:** +```sql +-- Convert numbers to strings for display +SELECT + product_id, + CONCAT('$', CAST(price AS VARCHAR)) AS formatted_price, + CONCAT(CAST(quantity AS VARCHAR), ' units') AS stock_info +FROM products; +``` + +**4. Date string parsing:** +```sql +-- Parse date strings from different formats +SELECT + order_id, + order_date_str, + CAST(order_date_str AS DATE) AS order_date +FROM orders +WHERE CAST(order_date_str AS DATE) >= '2025-01-01'; +``` + +**5. Type conversion in JOIN conditions:** +```sql +-- Convert types for joining +SELECT + o.order_id, + p.product_name +FROM orders o +JOIN products p ON CAST(o.product_id AS VARCHAR) = p.product_code; +``` + +**6. Boolean logic:** +```sql +-- Convert flags to boolean +SELECT + user_id, + username, + CAST(is_active AS BOOLEAN) AS active, + CAST(is_verified AS BOOLEAN) AS verified +FROM users +WHERE CAST(is_active AS BOOLEAN) = true; +``` + +**Error Cases (will throw errors):** +```sql +-- Invalid string to INT +SELECT CAST('not-a-number' AS INT); +-- ERROR: Cannot cast 'not-a-number' to INT + +-- Invalid date format +SELECT CAST('invalid-date' AS DATE); +-- ERROR: Cannot parse date + +-- Overflow +SELECT CAST(999999999999999999 AS TINYINT); +-- ERROR: Value out of range +``` + +--- + +### TRY_CAST / SAFE_CAST + +Attempt a cast and return NULL on failure (safer alternative). + +**Syntax:** +```sql +TRY_CAST(expr AS TYPE) +SAFE_CAST(expr AS TYPE) +``` + +**Inputs:** +- `expr` - Expression to convert +- `TYPE` - Target data type (same as CAST) + +**Output:** +- Value converted to target `TYPE`, or `NULL` if conversion fails + +**Behavior:** +- Returns `NULL` instead of throwing error on failure +- Useful for handling dirty/inconsistent data +- Safe for data validation and cleaning + +**Examples:** + +**Safe Numeric Conversions:** +```sql +-- Invalid string to INT returns NULL +SELECT TRY_CAST('not-a-number' AS INT) AS maybe_null; +-- Result: NULL + +-- Valid conversion works normally +SELECT TRY_CAST('123' AS INT) AS valid; +-- Result: 123 + +-- Using SAFE_CAST alias +SELECT SAFE_CAST('invalid' AS DOUBLE) AS result; +-- Result: NULL + +-- Mixed valid/invalid data +SELECT + value, + TRY_CAST(value AS INT) AS parsed_value +FROM ( + SELECT '123' AS value + UNION ALL SELECT 'abc' + UNION ALL SELECT '456' +) AS data; +-- Results: +-- '123' -> 123 +-- 'abc' -> NULL +-- '456' -> 456 +``` + +**Safe Date Conversions:** +```sql +-- Invalid date returns NULL +SELECT TRY_CAST('invalid-date' AS DATE) AS d; +-- Result: NULL + +-- Valid date works +SELECT TRY_CAST('2025-01-10' AS DATE) AS d; +-- Result: 2025-01-10 + +-- Handle multiple date formats +SELECT + date_str, + TRY_CAST(date_str AS DATE) AS parsed_date +FROM dates_table; +``` + +**Safe Boolean Conversions:** +```sql +-- Invalid boolean returns NULL +SELECT TRY_CAST('maybe' AS BOOLEAN) AS b; +-- Result: NULL + +-- Valid values work +SELECT TRY_CAST('true' AS BOOLEAN) AS b1, + TRY_CAST('false' AS BOOLEAN) AS b2, + TRY_CAST('1' AS BOOLEAN) AS b3; +-- Results: true, false, true +``` + +**Practical Examples:** + +**1. Data validation and cleaning:** +```sql +-- Identify and filter invalid data +SELECT + product_id, + price_str, + TRY_CAST(price_str AS DOUBLE) AS price, + CASE + WHEN TRY_CAST(price_str AS DOUBLE) IS NULL + THEN 'Invalid' + ELSE 'Valid' + END AS validation_status +FROM raw_products; +``` + +**2. Handle missing or corrupt data:** +```sql +-- Use COALESCE with TRY_CAST for defaults +SELECT + order_id, + COALESCE(TRY_CAST(quantity_str AS INT), 0) AS quantity, + COALESCE(TRY_CAST(price_str AS DOUBLE), 0.0) AS price +FROM orders_import; +``` + +**3. Filter out invalid records:** +```sql +-- Only process valid conversions +SELECT + user_id, + signup_date_str, + TRY_CAST(signup_date_str AS DATE) AS signup_date +FROM users_raw +WHERE TRY_CAST(signup_date_str AS DATE) IS NOT NULL; +``` + +**4. Data quality reporting:** +```sql +-- Count valid vs invalid conversions +SELECT + COUNT(*) AS total_records, + COUNT(TRY_CAST(price AS DOUBLE)) AS valid_prices, + COUNT(*) - COUNT(TRY_CAST(price AS DOUBLE)) AS invalid_prices, + ROUND( + COUNT(TRY_CAST(price AS DOUBLE)) * 100.0 / COUNT(*), + 2 + ) AS valid_percentage +FROM products_import; +``` + +**5. Safe type conversion in calculations:** +```sql +-- Avoid errors in calculations +SELECT + product_id, + name, + TRY_CAST(price AS DOUBLE) * TRY_CAST(quantity AS INT) AS total_value +FROM products +WHERE TRY_CAST(price AS DOUBLE) IS NOT NULL + AND TRY_CAST(quantity AS INT) IS NOT NULL; +``` + +**6. Multiple conversion attempts:** +```sql +-- Try multiple formats +SELECT + date_field, + COALESCE( + TRY_CAST(date_field AS DATE), + TRY_CAST(REPLACE(date_field, '/', '-') AS DATE), + TRY_CAST(CONCAT(date_field, '-01') AS DATE) + ) AS parsed_date +FROM date_strings; +``` + +**7. ETL data validation:** +```sql +-- Validate imported data before processing +SELECT + row_id, + customer_id, + order_date, + amount, + CASE + WHEN TRY_CAST(customer_id AS INT) IS NULL + THEN 'Invalid customer_id' + WHEN TRY_CAST(order_date AS DATE) IS NULL + THEN 'Invalid order_date' + WHEN TRY_CAST(amount AS DOUBLE) IS NULL + THEN 'Invalid amount' + ELSE 'Valid' + END AS validation_error +FROM staging_orders +WHERE TRY_CAST(customer_id AS INT) IS NULL + OR TRY_CAST(order_date AS DATE) IS NULL + OR TRY_CAST(amount AS DOUBLE) IS NULL; +``` + +**8. Safe aggregations:** +```sql +-- Aggregate only valid numeric values +SELECT + category, + COUNT(*) AS total_products, + COUNT(TRY_CAST(price AS DOUBLE)) AS products_with_valid_price, + AVG(TRY_CAST(price AS DOUBLE)) AS avg_price, + SUM(TRY_CAST(price AS DOUBLE)) AS total_value +FROM products_raw +GROUP BY category; +``` + +**9. Conditional conversion:** +```sql +-- Apply different conversions based on conditions +SELECT + field_name, + field_value, + CASE + WHEN field_name = 'age' + THEN TRY_CAST(field_value AS INT) + WHEN field_name = 'salary' + THEN TRY_CAST(field_value AS DOUBLE) + WHEN field_name = 'hire_date' + THEN TRY_CAST(field_value AS DATE) + ELSE field_value + END AS converted_value +FROM dynamic_fields; +``` + +**10. Data migration with validation:** +```sql +-- Migrate data with quality checks +INSERT INTO products_clean ( + product_id, + name, + price, + quantity, + created_date +) +SELECT + product_id, + name, + TRY_CAST(price_str AS DECIMAL(10, 2)) AS price, + TRY_CAST(quantity_str AS INT) AS quantity, + TRY_CAST(created_date_str AS DATE) AS created_date +FROM products_staging +WHERE TRY_CAST(price_str AS DECIMAL(10, 2)) IS NOT NULL + AND TRY_CAST(quantity_str AS INT) IS NOT NULL + AND TRY_CAST(created_date_str AS DATE) IS NOT NULL; +``` + +--- + +### CAST vs TRY_CAST Comparison + +| Feature | CAST | TRY_CAST | +|-----------------------|-----------------------|-----------------------| +| On conversion failure | Throws error | Returns NULL | +| Use case | Clean, validated data | Dirty, uncertain data | +| Performance | Slightly faster | Slightly slower | +| Data validation | Manual required | Built-in | +| Error handling | Must use try-catch | Automatic | +| NULL handling | Propagates NULL | Propagates NULL | + +**When to use CAST:** +- Data is already validated +- You want to catch errors explicitly +- Performance is critical +- Conversion should never fail + +**When to use TRY_CAST:** +- Working with user input +- Importing external data +- Data quality is uncertain +- Need graceful error handling +- ETL/data cleaning operations +- Want to filter invalid data + +--- + +### Common Type Conversion Patterns + +**1. String to Number (safe):** +```sql +SELECT + COALESCE(TRY_CAST(value AS INT), 0) AS safe_int, + COALESCE(TRY_CAST(value AS DOUBLE), 0.0) AS safe_double +FROM data_table; +``` + +**2. Number to String (formatted):** +```sql +SELECT + CONCAT('$', CAST(ROUND(price, 2) AS VARCHAR)) AS formatted_price +FROM products; +``` + +**3. Date String Parsing (flexible):** +```sql +SELECT + COALESCE( + TRY_CAST(date_str AS DATE), + TRY_CAST(REPLACE(date_str, '/', '-') AS DATE) + ) AS parsed_date +FROM dates; +``` + +**4. Boolean Flags:** +```sql +SELECT + CAST(CASE + WHEN status = 'active' THEN 1 + ELSE 0 + END AS BOOLEAN) AS is_active +FROM users; +``` + +**5. Type-safe Calculations:** +```sql +SELECT + CAST(numerator AS DOUBLE) / CAST(denominator AS DOUBLE) AS ratio +FROM calculations +WHERE denominator != 0; +``` + +--- + +### Type Conversion Summary Table + +| From Type | To Type | CAST Example | Notes | +|------------|-----------|-------------------------------|---------------------------------| +| VARCHAR | INT | `CAST('123' AS INT)` | Must be valid integer string | +| VARCHAR | DOUBLE | `CAST('123.45' AS DOUBLE)` | Must be valid number string | +| VARCHAR | DATE | `CAST('2025-01-10' AS DATE)` | Must be valid date format | +| VARCHAR | BOOLEAN | `CAST('true' AS BOOLEAN)` | Accepts 'true'/'false', '1'/'0' | +| INT | VARCHAR | `CAST(123 AS VARCHAR)` | Always succeeds | +| INT | DOUBLE | `CAST(123 AS DOUBLE)` | Always succeeds | +| INT | BOOLEAN | `CAST(1 AS BOOLEAN)` | 0=false, non-zero=true | +| DOUBLE | INT | `CAST(123.99 AS INT)` | Truncates decimal | +| DOUBLE | VARCHAR | `CAST(123.45 AS VARCHAR)` | Always succeeds | +| DATE | VARCHAR | `CAST(date_col AS VARCHAR)` | Format: 'YYYY-MM-DD' | +| DATE | TIMESTAMP | `CAST(date_col AS TIMESTAMP)` | Time set to 00:00:00 | +| TIMESTAMP | DATE | `CAST(ts_col AS DATE)` | Drops time component | +| TIMESTAMP | VARCHAR | `CAST(ts_col AS VARCHAR)` | Format: 'YYYY-MM-DD HH:MI:SS' | +| BOOLEAN | INT | `CAST(true AS INT)` | true=1, false=0 | +| BOOLEAN | VARCHAR | `CAST(true AS VARCHAR)` | 'true' or 'false' | + +--- + +### Best Practices + +**1. Use TRY_CAST for user input:** +```sql +-- Good +SELECT TRY_CAST(user_input AS INT) FROM form_data; + +-- Avoid +SELECT CAST(user_input AS INT) FROM form_data; -- May fail +``` + +**2. Validate before CAST:** +```sql +-- Good +SELECT CAST(price AS DOUBLE) +FROM products +WHERE price IS NOT NULL + AND price REGEXP '^[0-9]+\\.?[0-9]*$'; +``` + +**3. Provide defaults for failed conversions:** +```sql +-- Good +SELECT COALESCE(TRY_CAST(value AS INT), -1) AS safe_value +FROM data_table; +``` + +**4. Use appropriate precision for DECIMAL:** +```sql +-- Good +SELECT CAST(price AS DECIMAL(10, 2)) -- 10 digits, 2 decimal places +FROM products; + +-- Avoid +SELECT CAST(price AS DECIMAL(5, 2)) -- May overflow +``` + +**5. Document conversion logic:** +```sql +-- Good: Clear intent +SELECT + order_id, + -- Convert string price to numeric for calculations + CAST(price_str AS DECIMAL(10, 2)) AS price +FROM orders; +``` + +[Back to index](README.md) diff --git a/documentation/keywords.md b/documentation/sql/keywords.md similarity index 95% rename from documentation/keywords.md rename to documentation/sql/keywords.md index 9979e868..798ee8bb 100644 --- a/documentation/keywords.md +++ b/documentation/sql/keywords.md @@ -1,4 +1,4 @@ -[Back to index](./README.md) +[Back to index](README.md) # Keywords @@ -50,6 +50,7 @@ CONCAT POSITION REGEXP_LIKE REGEXP +MATCH ... AGAINST REPLACE REVERSE @@ -148,4 +149,4 @@ AND OR NOT -[Back to index](./README.md) +[Back to index](README.md) diff --git a/documentation/sql/operator_precedence.md b/documentation/sql/operator_precedence.md new file mode 100644 index 00000000..6132b34b --- /dev/null +++ b/documentation/sql/operator_precedence.md @@ -0,0 +1,804 @@ +[Back to index](README.md) + +## Operator Precedence + +This page lists operator precedence used by the parser and evaluator. Operators are evaluated in order from highest precedence (top) to lowest precedence (bottom). + +--- + +### Precedence Order + +| Precedence | Operator(s) | Category | Description | +|-----------------|----------------------------------|----------------------|----------------------------------------------------------| +| **1** (Highest) | `(...)` | Parentheses | Grouping and explicit precedence | +| **2** | `-`, `+`, `NOT` | Unary | Negation, unary plus, logical NOT | +| **3** | `*`, `/`, `%` | Multiplicative | Multiplication, division, modulo | +| **4** | `+`, `-` | Additive | Addition, subtraction | +| **5** | `<`, `<=`, `>`, `>=` | Comparison | Less than, less or equal, greater than, greater or equal | +| **6** | `=`, `!=`, `<>` | Equality | Equal, not equal | +| **7** | `BETWEEN`, `IN`, `LIKE`, `RLIKE` | Membership & Pattern | Range, set membership, pattern matching | +| **8** | `AND` | Logical AND | Logical conjunction | +| **9** (Lowest) | `OR` | Logical OR | Logical disjunction | + +--- + +### 1. Parentheses `(...)` + +**Highest precedence** - Used to explicitly control evaluation order. + +**Examples:** +```sql +-- Without parentheses +SELECT 1 + 2 * 3 AS v; +-- Result: 7 (multiplication first: 1 + 6) + +-- With parentheses +SELECT (1 + 2) * 3 AS v; +-- Result: 9 (addition first: 3 * 3) + +-- Complex expression +SELECT 10 - 2 * 3 AS v1, + (10 - 2) * 3 AS v2; +-- v1 = 4 (10 - 6) +-- v2 = 24 (8 * 3) + +-- Multiple levels +SELECT ((5 + 3) * 2 - 4) / 2 AS result; +-- Result: 6 +-- Step 1: (5 + 3) = 8 +-- Step 2: 8 * 2 = 16 +-- Step 3: 16 - 4 = 12 +-- Step 4: 12 / 2 = 6 +``` + +**Best Practice:** +```sql +-- Use parentheses for clarity even when not required +SELECT price * (1 + tax_rate) AS total_price +FROM products; + +-- Better than ambiguous: +SELECT price * 1 + tax_rate AS total_price -- Unclear intent +FROM products; +``` + +--- + +### 2. Unary Operators: `-`, `+`, `NOT` + +**Second highest precedence** - Applied to single operands. + +**Examples:** + +**Unary Minus (Negation):** +```sql +-- Negate a value +SELECT -5 AS neg_value; +-- Result: -5 + +-- Negate column value +SELECT product_id, -price AS negative_price +FROM products; + +-- In calculations +SELECT 10 + -5 AS result; +-- Result: 5 + +-- With parentheses for clarity +SELECT 10 + (-5) AS result; +-- Result: 5 + +-- Double negation +SELECT -(-10) AS result; +-- Result: 10 +``` + +**Unary Plus:** +```sql +-- Explicit positive (rarely used) +SELECT +5 AS pos_value; +-- Result: 5 + +SELECT +price AS positive_price +FROM products; +``` + +**Logical NOT:** +```sql +-- Negate boolean expression +SELECT * FROM users +WHERE NOT is_active; +-- Same as: WHERE is_active = false + +-- NOT with comparison +SELECT * FROM products +WHERE NOT (price > 100); +-- Same as: WHERE price <= 100 + +-- NOT with IN +SELECT * FROM orders +WHERE NOT status IN ('cancelled', 'refunded'); +-- Same as: WHERE status NOT IN ('cancelled', 'refunded') + +-- Multiple NOT +SELECT * FROM users +WHERE NOT (NOT is_verified); +-- Same as: WHERE is_verified +``` + +--- + +### 3. Multiplicative: `*`, `/`, `%` + +**Third precedence** - Multiplication, division, and modulo operations. + +**Examples:** + +**Multiplication:** +```sql +-- Basic multiplication +SELECT 5 * 3 AS result; +-- Result: 15 + +-- In expressions +SELECT price * quantity AS total +FROM order_items; + +-- Multiple multiplications (left to right) +SELECT 2 * 3 * 4 AS result; +-- Result: 24 (evaluated as (2 * 3) * 4) +``` + +**Division:** +```sql +-- Basic division +SELECT 10 / 2 AS result; +-- Result: 5 + +-- Integer division +SELECT 10 / 3 AS result; +-- Result: 3 (truncated if both operands are integers) + +-- Float division +SELECT 10.0 / 3 AS result; +-- Result: 3.333... + +-- Avoid division by zero +SELECT + CASE + WHEN quantity != 0 THEN total / quantity + ELSE 0 + END AS avg_price +FROM orders; +``` + +**Modulo:** +```sql +-- Basic modulo +SELECT 10 % 3 AS remainder; +-- Result: 1 + +-- Even/odd check +SELECT + number, + CASE WHEN number % 2 = 0 THEN 'Even' ELSE 'Odd' END AS parity +FROM numbers; + +-- Cycling values +SELECT + day_number, + day_number % 7 AS day_of_week +FROM calendar; +``` + +**Mixed Operations:** +```sql +-- Multiplication and division (left to right) +SELECT 10 * 2 / 4 AS result; +-- Result: 5 (evaluated as (10 * 2) / 4 = 20 / 4) + +-- With modulo +SELECT 17 % 5 * 2 AS result; +-- Result: 4 (evaluated as (17 % 5) * 2 = 2 * 2) +``` + +--- + +### 4. Additive: `+`, `-` + +**Fourth precedence** - Addition and subtraction operations. + +**Examples:** + +**Addition:** +```sql +-- Basic addition +SELECT 5 + 3 AS result; +-- Result: 8 + +-- Multiple additions +SELECT 10 + 20 + 30 AS result; +-- Result: 60 + +-- With columns +SELECT + base_price + tax + shipping AS total_cost +FROM orders; +``` + +**Subtraction:** +```sql +-- Basic subtraction +SELECT 10 - 3 AS result; +-- Result: 7 + +-- Multiple subtractions (left to right) +SELECT 100 - 20 - 10 AS result; +-- Result: 70 (evaluated as (100 - 20) - 10) + +-- Date arithmetic +SELECT + order_date, + DATE_SUB(order_date, INTERVAL 7 DAY) AS week_ago +FROM orders; +``` + +**Mixed with Multiplicative:** +```sql +-- Multiplication before addition +SELECT 1 + 2 * 3 AS result; +-- Result: 7 (evaluated as 1 + (2 * 3) = 1 + 6) + +-- Use parentheses to change order +SELECT (1 + 2) * 3 AS result; +-- Result: 9 + +-- Complex expression +SELECT 10 + 5 * 2 - 3 AS result; +-- Result: 17 (evaluated as 10 + (5 * 2) - 3 = 10 + 10 - 3) + +-- With parentheses +SELECT (10 + 5) * (2 - 3) AS result; +-- Result: -15 (evaluated as 15 * (-1)) +``` + +--- + +### 5. Comparison: `<`, `<=`, `>`, `>=` + +**Fifth precedence** - Relational comparisons. + +**Examples:** + +**Less Than / Greater Than:** +```sql +-- Basic comparisons +SELECT 5 < 10 AS result; +-- Result: true + +SELECT 5 > 10 AS result; +-- Result: false + +-- In WHERE clause +SELECT * FROM products +WHERE price < 100; + +SELECT * FROM users +WHERE age >= 18; +``` + +**With Arithmetic:** +```sql +-- Arithmetic evaluated first +SELECT * FROM products +WHERE price + tax > 100; +-- Evaluated as: (price + tax) > 100 + +SELECT * FROM orders +WHERE quantity * price < 1000; +-- Evaluated as: (quantity * price) < 1000 +``` + +**Multiple Comparisons:** +```sql +-- Chained comparisons require AND +SELECT * FROM products +WHERE price > 50 AND price < 100; + +-- NOT this (syntax error in most SQL): +-- WHERE 50 < price < 100 + +-- Use BETWEEN instead +SELECT * FROM products +WHERE price BETWEEN 50 AND 100; +``` + +--- + +### 6. Equality: `=`, `!=`, `<>` + +**Sixth precedence** - Equality and inequality checks. + +**Examples:** + +**Equality:** +```sql +-- Basic equality +SELECT 5 = 5 AS result; +-- Result: true + +-- In WHERE clause +SELECT * FROM users +WHERE status = 'active'; + +-- NULL handling +SELECT * FROM users +WHERE email = NULL; -- Always false! +-- Use: WHERE email IS NULL +``` + +**Inequality:** +```sql +-- Not equal (two forms) +SELECT 5 != 3 AS result; +-- Result: true + +SELECT 5 <> 3 AS result; +-- Result: true + +-- In WHERE clause +SELECT * FROM orders +WHERE status != 'cancelled'; + +SELECT * FROM products +WHERE category <> 'discontinued'; +``` + +**With Comparisons:** +```sql +-- Comparison before equality +SELECT * FROM products +WHERE price > 50 = true; +-- Evaluated as: (price > 50) = true + +-- More readable: +SELECT * FROM products +WHERE (price > 50) = true; + +-- Or simply: +SELECT * FROM products +WHERE price > 50; +``` + +--- + +### 7. Membership & Pattern: `BETWEEN`, `IN`, `LIKE`, `RLIKE` + +**Seventh precedence** - Range, set membership, and pattern matching. + +**Examples:** + +**BETWEEN:** +```sql +-- Range check +SELECT * FROM products +WHERE price BETWEEN 50 AND 100; +-- Equivalent to: price >= 50 AND price <= 100 + +-- With OR (lower precedence) +SELECT * FROM products +WHERE price BETWEEN 50 AND 100 OR category = 'sale'; +-- Evaluated as: (price BETWEEN 50 AND 100) OR (category = 'sale') + +-- NOT BETWEEN +SELECT * FROM products +WHERE price NOT BETWEEN 50 AND 100; +-- Equivalent to: price < 50 OR price > 100 +``` + +**IN:** +```sql +-- Set membership +SELECT * FROM orders +WHERE status IN ('pending', 'processing', 'shipped'); + +-- With OR (lower precedence) +SELECT * FROM products +WHERE category IN ('electronics', 'computers') OR on_sale = true; +-- Evaluated as: (category IN (...)) OR (on_sale = true) + +-- NOT IN +SELECT * FROM users +WHERE country NOT IN ('US', 'CA', 'MX'); +``` + +**LIKE:** +```sql +-- Pattern matching +SELECT * FROM products +WHERE name LIKE '%phone%'; + +-- With OR +SELECT * FROM products +WHERE name LIKE '%phone%' OR name LIKE '%tablet%'; +-- Evaluated as: (name LIKE '%phone%') OR (name LIKE '%tablet%') + +-- NOT LIKE +SELECT * FROM products +WHERE name NOT LIKE '%discontinued%'; +``` + +**RLIKE (Regular Expression):** +```sql +-- Regex pattern matching +SELECT * FROM products +WHERE name RLIKE '^[A-Z].*Pro$'; + +-- With OR +SELECT * FROM products +WHERE name RLIKE 'iPhone|iPad' OR category = 'Apple'; +-- Evaluated as: (name RLIKE 'iPhone|iPad') OR (category = 'Apple') +``` + +**Mixed with AND:** +```sql +-- BETWEEN with AND (AND has lower precedence) +SELECT * FROM products +WHERE price BETWEEN 50 AND 100 AND category = 'electronics'; +-- Evaluated as: (price BETWEEN 50 AND 100) AND (category = 'electronics') +``` + +--- + +### 8. Logical AND + +**Eighth precedence** - Logical conjunction (both conditions must be true). + +**Examples:** + +**Basic AND:** +```sql +-- Both conditions must be true +SELECT * FROM users +WHERE is_active = true AND is_verified = true; + +-- Multiple AND conditions +SELECT * FROM products +WHERE price > 50 + AND price < 100 + AND in_stock = true; +``` + +**AND with OR (OR has lower precedence):** +```sql +-- AND evaluated before OR +SELECT * FROM products +WHERE category = 'electronics' AND price < 100 OR on_sale = true; +-- Evaluated as: ((category = 'electronics') AND (price < 100)) OR (on_sale = true) +-- Matches: (electronics under $100) OR (anything on sale) + +-- Use parentheses for different logic +SELECT * FROM products +WHERE category = 'electronics' AND (price < 100 OR on_sale = true); +-- Evaluated as: (category = 'electronics') AND ((price < 100) OR (on_sale = true)) +-- Matches: electronics that are (under $100 OR on sale) +``` + +**Complex AND Expressions:** +```sql +-- Multiple AND with comparisons +SELECT * FROM orders +WHERE status = 'shipped' + AND shipped_date >= '2025-01-01' + AND total_amount > 100 + AND customer_id IN (SELECT id FROM premium_customers); +``` + +--- + +### 9. Logical OR + +**Lowest precedence** - Logical disjunction (at least one condition must be true). + +**Examples:** + +**Basic OR:** +```sql +-- At least one condition must be true +SELECT * FROM users +WHERE country = 'US' OR country = 'CA'; + +-- Multiple OR conditions +SELECT * FROM products +WHERE category = 'electronics' + OR category = 'computers' + OR category = 'phones'; +``` + +**OR with AND (AND has higher precedence):** +```sql +-- Example from specification +SELECT * FROM products +WHERE category = 'electronics' AND price < 100 OR on_sale = true; +-- Evaluated as: ((category = 'electronics') AND (price < 100)) OR (on_sale = true) + +-- Explicit parentheses for clarity +SELECT * FROM products +WHERE (category = 'electronics' AND price < 100) OR on_sale = true; + +-- Different logic with parentheses +SELECT * FROM products +WHERE category = 'electronics' AND (price < 100 OR on_sale = true); +``` + +**BETWEEN with OR:** +```sql +-- Example from specification +SELECT * FROM products +WHERE price BETWEEN 50 AND 100 OR category = 'sale'; +-- Evaluated as: (price BETWEEN 50 AND 100) OR (category = 'sale') + +-- Multiple BETWEEN with OR +SELECT * FROM products +WHERE price BETWEEN 10 AND 50 + OR price BETWEEN 200 AND 300 + OR category = 'clearance'; +``` + +**Complex OR Expressions:** +```sql +-- OR with multiple conditions +SELECT * FROM orders +WHERE status = 'cancelled' + OR status = 'refunded' + OR (status = 'pending' AND created_date < DATE_SUB(CURRENT_DATE, INTERVAL 30 DAY)); +``` + +--- + +### Practical Examples + +**Example 1: Arithmetic Precedence** +```sql +-- Without parentheses (follows precedence) +SELECT 1 + 2 * 3 AS v; +-- Result: 7 +-- Evaluation: 1 + (2 * 3) = 1 + 6 = 7 + +-- With parentheses (override precedence) +SELECT (1 + 2) * 3 AS v; +-- Result: 9 +-- Evaluation: (1 + 2) * 3 = 3 * 3 = 9 + +-- Complex calculation +SELECT 10 + 5 * 2 - 8 / 4 AS result; +-- Result: 18 +-- Evaluation: 10 + (5 * 2) - (8 / 4) = 10 + 10 - 2 = 18 +``` + +**Example 2: Comparison and Logical Operators** +```sql +-- BETWEEN with OR +SELECT * FROM products +WHERE price BETWEEN 50 AND 100 OR category = 'sale'; +-- Evaluated as: (price BETWEEN 50 AND 100) OR (category = 'sale') + +-- AND before OR +SELECT * FROM products +WHERE category = 'electronics' AND price < 100 OR on_sale = true; +-- Evaluated as: ((category = 'electronics') AND (price < 100)) OR (on_sale = true) + +-- Use parentheses for clarity +SELECT * FROM products +WHERE category = 'electronics' AND (price < 100 OR on_sale = true); +-- Evaluated as: (category = 'electronics') AND ((price < 100) OR (on_sale = true)) +``` + +**Example 3: NOT Operator** +```sql +-- NOT with high precedence +SELECT * FROM users +WHERE NOT is_active AND is_verified; +-- Evaluated as: (NOT is_active) AND (is_verified) + +-- Use parentheses for different logic +SELECT * FROM users +WHERE NOT (is_active AND is_verified); +-- Evaluated as: NOT ((is_active) AND (is_verified)) +``` + +**Example 4: Complex Business Logic** +```sql +-- Find premium or high-value orders +SELECT * FROM orders +WHERE customer_type = 'premium' + AND total_amount > 500 + OR total_amount > 1000 + AND status = 'completed'; +-- Evaluated as: +-- ((customer_type = 'premium') AND (total_amount > 500)) +-- OR +-- ((total_amount > 1000) AND (status = 'completed')) + +-- More readable with parentheses +SELECT * FROM orders +WHERE (customer_type = 'premium' AND total_amount > 500) + OR (total_amount > 1000 AND status = 'completed'); +``` + +**Example 5: IN with OR and AND** +```sql +-- IN with AND and OR +SELECT * FROM products +WHERE category IN ('electronics', 'computers') + AND price < 500 + OR on_sale = true; +-- Evaluated as: +-- ((category IN ('electronics', 'computers')) AND (price < 500)) +-- OR +-- (on_sale = true) + +-- Clearer with parentheses +SELECT * FROM products +WHERE (category IN ('electronics', 'computers') AND price < 500) + OR on_sale = true; +``` + +--- + +### Best Practices + +**1. Use Parentheses for Clarity** +```sql +-- Good: Explicit and clear +SELECT * FROM orders +WHERE (status = 'pending' AND created_date < '2025-01-01') + OR (priority = 'high'); + +-- Avoid: Relies on precedence knowledge +SELECT * FROM orders +WHERE status = 'pending' AND created_date < '2025-01-01' + OR priority = 'high'; +``` + +**2. Group Related Conditions** +```sql +-- Good: Logical grouping +SELECT * FROM products +WHERE (category = 'electronics' AND price < 100) + OR (category = 'books' AND price < 20) + OR on_sale = true; + +-- Avoid: Flat structure +SELECT * FROM products +WHERE category = 'electronics' AND price < 100 + OR category = 'books' AND price < 20 + OR on_sale = true; +``` + +[//]: # (**3. Break Complex Expressions into CTEs**) + +[//]: # (```sql) + +[//]: # (-- Good: Readable with CTE) + +[//]: # (WITH affordable_electronics AS () + +[//]: # ( SELECT * FROM products) + +[//]: # ( WHERE category = 'electronics' AND price < 100) + +[//]: # (),) + +[//]: # (sale_items AS () + +[//]: # ( SELECT * FROM products) + +[//]: # ( WHERE on_sale = true) + +[//]: # ()) + +[//]: # (SELECT * FROM affordable_electronics) + +[//]: # (UNION) + +[//]: # (SELECT * FROM sale_items;) + +[//]: # () +[//]: # (-- Avoid: Complex single query) + +[//]: # (SELECT * FROM products) + +[//]: # (WHERE category = 'electronics' AND price < 100 OR on_sale = true;) + +[//]: # (```) + +**3. Document Complex Logic** +```sql +-- Good: Commented for clarity +SELECT * FROM orders +WHERE + -- High priority orders + (priority = 'high' AND status = 'pending') + -- OR old pending orders + OR (status = 'pending' AND created_date < DATE_SUB(CURRENT_DATE, INTERVAL 7 DAY)) + -- OR VIP customer orders + OR customer_id IN (SELECT id FROM vip_customers); +``` + +--- + +### Common Mistakes + +**Mistake 1: Forgetting AND before OR precedence** +```sql +-- Wrong interpretation +SELECT * FROM products +WHERE category = 'electronics' AND price < 100 OR on_sale = true; +-- Might think: electronics that are (under $100 OR on sale) +-- Actually means: (electronics under $100) OR (anything on sale) + +-- Correct with parentheses +SELECT * FROM products +WHERE category = 'electronics' AND (price < 100 OR on_sale = true); +``` + +**Mistake 2: Arithmetic order confusion** +```sql +-- Wrong +SELECT price + tax * quantity FROM orders; +-- Evaluated as: price + (tax * quantity) + +-- Correct +SELECT (price + tax) * quantity FROM orders; +``` + +**Mistake 3: NOT operator scope** +```sql +-- Wrong interpretation +SELECT * FROM users +WHERE NOT is_active AND is_verified; +-- Evaluated as: (NOT is_active) AND (is_verified) + +-- If you want: NOT (is_active AND is_verified) +SELECT * FROM users +WHERE NOT (is_active AND is_verified); +``` + +**Mistake 4: Multiple comparisons** +```sql +-- Wrong (syntax error) +SELECT * FROM products +WHERE 10 < price < 100; + +-- Correct +SELECT * FROM products +WHERE price > 10 AND price < 100; + +-- Or use BETWEEN +SELECT * FROM products +WHERE price BETWEEN 10 AND 100; +``` + +--- + +### Precedence Summary Table + +| Level | Operators | Associativity | Example | +|--------|----------------------------------|----------------|----------------------| +| 1 | `(...)` | N/A | `(a + b) * c` | +| 2 | `-`, `+`, `NOT` | Right | `-a`, `NOT b` | +| 3 | `*`, `/`, `%` | Left | `a * b / c` | +| 4 | `+`, `-` | Left | `a + b - c` | +| 5 | `<`, `<=`, `>`, `>=` | Left | `a < b` | +| 6 | `=`, `!=`, `<>` | Left | `a = b` | +| 7 | `BETWEEN`, `IN`, `LIKE`, `RLIKE` | N/A | `a BETWEEN 1 AND 10` | +| 8 | `AND` | Left | `a AND b AND c` | +| 9 | `OR` | Left | `a OR b OR c` | + +**Associativity:** +- **Left**: Operators of same precedence evaluate left-to-right: `a - b - c` = `(a - b) - c` +- **Right**: Operators evaluate right-to-left: `-a` applies to `a` first +- **N/A**: Not applicable for grouping or special operators + +[Back to index](README.md) diff --git a/documentation/sql/operators.md b/documentation/sql/operators.md new file mode 100644 index 00000000..90a545db --- /dev/null +++ b/documentation/sql/operators.md @@ -0,0 +1,1930 @@ +[Back to index](README.md) + +# Operators (detailed) + +**Navigation:** [Query Structure](request_structure.md) · [Operator Precedence](operator_precedence.md) · [Keywords](keywords.md) + +This file provides a per-operator description and concrete SQL examples for each operator supported by the engine. + +--- + +## Table of Contents + +1. [Math Operators](#math-operators) +2. [Comparison Operators](#comparison-operators) +3. [Logical Operators](#logical-operators) +4. [Cast Operators](#cast-operators) + +--- + +## Math Operators + +### Operator: `+` + +**Description:** +Arithmetic addition. + +**Syntax:** +```sql +expr1 + expr2 +``` + +**Inputs:** +- `expr1`, `expr2` - Numeric expressions (`INT`, `DOUBLE`, `DECIMAL`, etc.) + +**Output:** +- Numeric type (result type depends on operand types) + +**Examples:** + +**Basic Addition:** +```sql +-- Add two numbers +SELECT 5 + 3 AS result; +-- Result: 8 + +-- Add column values +SELECT salary + bonus AS total_comp FROM emp; +-- Result example: if salary=50000 and bonus=10000 -> total_comp = 60000 + +-- Multiple additions +SELECT base_price + tax + shipping AS total_cost +FROM orders; +``` + +**With Different Types:** +```sql +-- Integer addition +SELECT 10 + 20 AS sum; +-- Result: 30 + +-- Float addition +SELECT 10.5 + 20.3 AS sum; +-- Result: 30.8 + +-- Mixed types (INT + DOUBLE) +SELECT 10 + 20.5 AS sum; +-- Result: 30.5 (promoted to DOUBLE) +``` + +**NULL Handling:** +```sql +-- NULL propagation +SELECT salary + NULL AS result FROM emp; +-- Result: NULL + +-- Use COALESCE for default +SELECT salary + COALESCE(bonus, 0) AS total FROM emp; +``` + +--- + +### Operator: `-` + +**Description:** +Arithmetic subtraction or unary negation when used with single operand. + +**Syntax:** +```sql +-- Binary (subtraction) +expr1 - expr2 + +-- Unary (negation) +-expr +``` + +**Inputs:** +- `expr1`, `expr2` - Numeric expressions +- `expr` - Numeric expression (for unary) + +**Output:** +- Numeric type + +**Examples:** + +**Subtraction:** +```sql +-- Basic subtraction +SELECT 10 - 3 AS result; +-- Result: 7 + +-- Column subtraction +SELECT salary - tax AS net FROM emp; + +-- Multiple subtractions +SELECT revenue - cost - overhead AS profit +FROM financials; +``` + +**Unary Negation:** +```sql +-- Negate a value +SELECT -balance AS negative_balance FROM accounts; + +-- Negate column +SELECT -price AS negated_price FROM products; + +-- In expressions +SELECT 100 + (-50) AS result; +-- Result: 50 +``` + +**Date Arithmetic:** +```sql +-- Date subtraction (days between) +SELECT order_date - ship_date AS days_to_ship +FROM orders; + +-- With INTERVAL +SELECT order_date - INTERVAL 7 DAY AS week_ago +FROM orders; +``` + +--- + +### Operator: `*` + +**Description:** +Multiplication. + +**Syntax:** +```sql +expr1 * expr2 +``` + +**Inputs:** +- `expr1`, `expr2` - Numeric expressions + +**Output:** +- Numeric type + +**Examples:** + +**Basic Multiplication:** +```sql +-- Multiply two numbers +SELECT 5 * 3 AS result; +-- Result: 15 + +-- Calculate revenue +SELECT quantity * price AS revenue FROM sales; + +-- Multiple multiplications +SELECT length * width * height AS volume +FROM boxes; +``` + +**With Different Types:** +```sql +-- Integer multiplication +SELECT 10 * 5 AS product; +-- Result: 50 + +-- Float multiplication +SELECT 10.5 * 2.0 AS product; +-- Result: 21.0 + +-- Mixed types +SELECT 10 * 2.5 AS product; +-- Result: 25.0 +``` + +**Practical Examples:** +```sql +-- Calculate total with tax +SELECT price * (1 + tax_rate) AS total_price +FROM products; + +-- Calculate discount +SELECT price * (1 - discount_percent / 100) AS discounted_price +FROM products; + +-- Area calculation +SELECT width * height AS area FROM rectangles; +``` + +--- + +### Operator: `/` + +**Description:** +Division; division by zero must be guarded (using NULLIF). Engine returns NULL for invalid arithmetic. + +**Syntax:** +```sql +expr1 / expr2 +``` + +**Inputs:** +- `expr1`, `expr2` - Numeric expressions + +**Output:** +- Numeric type (NULL if division by zero) + +**Examples:** + +**Basic Division:** +```sql +-- Divide two numbers +SELECT 10 / 2 AS result; +-- Result: 5 + +-- Calculate average +SELECT total / NULLIF(count, 0) AS avg FROM table; + +-- Per-unit price +SELECT total_price / quantity AS unit_price +FROM order_items; +``` + +**Integer vs Float Division:** +```sql +-- Integer division (truncates) +SELECT 10 / 3 AS result; +-- Result: 3 (if both are integers) + +-- Float division +SELECT 10.0 / 3 AS result; +-- Result: 3.333... + +-- Force float division +SELECT CAST(10 AS DOUBLE) / 3 AS result; +-- Result: 3.333... +``` + +**Division by Zero Protection:** +```sql +-- Using NULLIF (recommended) +SELECT total / NULLIF(count, 0) AS avg +FROM statistics; +-- Returns NULL if count = 0 + +-- Using CASE +SELECT + CASE + WHEN count != 0 THEN total / count + ELSE 0 + END AS avg +FROM statistics; + +-- Using COALESCE for default +SELECT COALESCE(total / NULLIF(count, 0), 0) AS avg +FROM statistics; +``` + +**Practical Examples:** +```sql +-- Calculate percentage +SELECT (passed / NULLIF(total, 0)) * 100 AS pass_rate +FROM exam_results; + +-- Average order value +SELECT + SUM(total) / NULLIF(COUNT(*), 0) AS avg_order_value +FROM orders; + +-- Split cost +SELECT total_cost / NULLIF(num_people, 0) AS cost_per_person +FROM expenses; +``` + +--- + +### Operator: `%` (MOD) + +**Description:** +Remainder/modulo operator. + +**Syntax:** +```sql +expr1 % expr2 +``` + +**Inputs:** +- `expr1`, `expr2` - Integer expressions + +**Output:** +- Integer (remainder of division) + +**Examples:** + +**Basic Modulo:** +```sql +-- Get remainder +SELECT 10 % 3 AS remainder; +-- Result: 1 + +-- Bucket users by ID +SELECT id % 10 AS bucket FROM users; + +-- Check if number is even +SELECT + number, + CASE WHEN number % 2 = 0 THEN 'Even' ELSE 'Odd' END AS parity +FROM numbers; +``` + +**Practical Examples:** +```sql +-- Distribute data across shards +SELECT + user_id, + user_id % 5 AS shard_id +FROM users; + +-- Find every Nth record +SELECT * FROM logs +WHERE log_id % 100 = 0; -- Every 100th record + +-- Cycle through values +SELECT + day_number, + day_number % 7 AS day_of_week +FROM calendar; + +-- Alternate row colors (even/odd) +SELECT + row_number, + CASE WHEN row_number % 2 = 0 THEN 'even-row' ELSE 'odd-row' END AS css_class +FROM data_table; +``` + +**With Negative Numbers:** +```sql +-- Modulo with negative numbers +SELECT -10 % 3 AS result; +-- Result: -1 (sign follows dividend) + +SELECT 10 % -3 AS result; +-- Result: 1 +``` + +--- + +## Comparison Operators + +### Operator: `=` + +**Description:** +Equality comparison. + +**Syntax:** +```sql +expr1 = expr2 +``` + +**Inputs:** +- `expr1`, `expr2` - Any comparable types + +**Return type:** +- `BOOLEAN` + +**Examples:** + +**Basic Equality:** +```sql +-- Compare values +SELECT 5 = 5 AS result; +-- Result: true + +-- Filter by department +SELECT * FROM emp WHERE department = 'IT'; + +-- Compare columns +SELECT * FROM orders +WHERE customer_id = shipping_customer_id; +``` + +**String Comparison:** +```sql +-- Case-sensitive string comparison +SELECT * FROM users WHERE username = 'john_doe'; + +-- Compare with column +SELECT * FROM products +WHERE category = 'Electronics'; +``` + +**Numeric Comparison:** +```sql +-- Integer equality +SELECT * FROM products WHERE stock_quantity = 0; + +-- Decimal equality +SELECT * FROM orders WHERE total_amount = 99.99; +``` + +**Date Comparison:** +```sql +-- Date equality +SELECT * FROM orders WHERE order_date = '2025-01-10'; + +-- Timestamp equality +SELECT * FROM events +WHERE event_timestamp = '2025-01-10 14:30:00'; +``` + +**NULL Handling:** +```sql +-- NULL comparison always returns NULL (not true or false) +SELECT * FROM emp WHERE manager = NULL; -- Returns no rows! + +-- Use IS NULL instead +SELECT * FROM emp WHERE manager IS NULL; +``` + +--- + +### Operator: `<>`, `!=` + +**Description:** +Inequality comparison (both synonyms supported). + +**Syntax:** +```sql +expr1 <> expr2 +expr1 != expr2 +``` + +**Inputs:** +- `expr1`, `expr2` - Any comparable types + +**Return type:** +- `BOOLEAN` + +**Examples:** + +**Basic Inequality:** +```sql +-- Not equal +SELECT 5 <> 3 AS result; +-- Result: true + +SELECT 5 != 3 AS result; +-- Result: true + +-- Filter by status +SELECT * FROM emp WHERE status <> 'terminated'; +SELECT * FROM emp WHERE status != 'terminated'; +``` + +**String Inequality:** +```sql +-- Exclude specific values +SELECT * FROM products WHERE category != 'Discontinued'; + +-- Multiple exclusions (use NOT IN instead) +SELECT * FROM orders +WHERE status <> 'cancelled' + AND status <> 'refunded'; +``` + +**Numeric Inequality:** +```sql +-- Not equal to zero +SELECT * FROM products WHERE stock_quantity != 0; + +-- Exclude specific value +SELECT * FROM users WHERE age <> 18; +``` + +**NULL Handling:** +```sql +-- NULL inequality returns NULL (not true) +SELECT * FROM emp WHERE manager != NULL; -- Returns no rows! + +-- Use IS NOT NULL instead +SELECT * FROM emp WHERE manager IS NOT NULL; +``` + +--- + +### Operator: `<`, `<=`, `>`, `>=` + +**Description:** +Relational comparisons. + +**Syntax:** +```sql +expr1 < expr2 -- Less than +expr1 <= expr2 -- Less than or equal +expr1 > expr2 -- Greater than +expr1 >= expr2 -- Greater than or equal +``` + +**Inputs:** +- `expr1`, `expr2` - Comparable types (numeric, string, date, etc.) + +**Return type:** +- `BOOLEAN` + +**Examples:** + +**Numeric Comparisons:** +```sql +-- Greater than +SELECT * FROM products WHERE price > 100; + +-- Less than or equal +SELECT * FROM users WHERE age <= 18; + +-- Age range +SELECT * FROM emp WHERE age >= 21 AND age < 65; + +-- Between alternative +SELECT * FROM products +WHERE price >= 50 AND price <= 100; +``` + +**String Comparisons:** +```sql +-- Lexicographic comparison +SELECT * FROM users WHERE username > 'M'; -- Names starting with M-Z + +-- Alphabetical range +SELECT * FROM products +WHERE name >= 'A' AND name < 'D'; +``` + +**Date Comparisons:** +```sql +-- After specific date +SELECT * FROM orders WHERE order_date > '2025-01-01'; + +-- Before or on date +SELECT * FROM events WHERE event_date <= CURRENT_DATE; + +-- Date range +SELECT * FROM logs +WHERE log_date >= '2025-01-01' + AND log_date < '2025-02-01'; +``` + +**Timestamp Comparisons:** +```sql +-- Recent records +SELECT * FROM activities +WHERE created_at >= DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 1 HOUR); + +-- Within time range +SELECT * FROM events +WHERE event_time >= '2025-01-10 09:00:00' + AND event_time <= '2025-01-10 17:00:00'; +``` + +--- + +### Operator: `IN` + +**Description:** +Membership in a set of literal or numeric values, or results of subquery (subquery support depends on implementation). + +**Syntax:** +```sql +expr IN (value1, value2, ...) +expr IN (subquery) +``` + +**Inputs:** +- `expr` - Expression to test +- `value1, value2, ...` - List of values +- `subquery` - Subquery returning single column + +**Return type:** +- `BOOLEAN` + +**Examples:** + +**Basic IN with Literals:** +```sql +-- String values +SELECT * FROM emp WHERE department IN ('Sales', 'IT', 'HR'); + +-- Numeric values +SELECT * FROM emp WHERE status IN (1, 2); + +-- Single value (equivalent to =) +SELECT * FROM products WHERE category IN ('Electronics'); +``` + +**Multiple Value Types:** +```sql +-- Integer list +SELECT * FROM orders WHERE order_id IN (100, 101, 102, 103); + +-- String list +SELECT * FROM users +WHERE country IN ('US', 'CA', 'MX', 'UK'); + +-- Date list +SELECT * FROM events +WHERE event_date IN ('2025-01-01', '2025-01-15', '2025-01-31'); +``` + +**With Subquery:** +```sql +-- Subquery returning IDs +SELECT * FROM orders +WHERE customer_id IN ( + SELECT id FROM customers WHERE status = 'premium' +); + +-- Nested subquery +SELECT * FROM products +WHERE category_id IN ( + SELECT id FROM categories WHERE active = true +); +``` + +**Empty List:** +```sql +-- Empty IN list returns false +SELECT * FROM products WHERE id IN (); +-- Returns no rows +``` + +**NULL Handling:** +```sql +-- NULL in list +SELECT * FROM users WHERE status IN ('active', NULL); +-- NULL is ignored in the list + +-- Column with NULL +SELECT * FROM users WHERE email IN ('test@example.com'); +-- Rows with NULL email are not matched +``` + +--- + +### Operator: `NOT IN` + +**Description:** +Negated membership. + +**Syntax:** +```sql +expr NOT IN (value1, value2, ...) +expr NOT IN (subquery) +``` + +**Inputs:** +- `expr` - Expression to test +- `value1, value2, ...` - List of values to exclude +- `subquery` - Subquery returning single column + +**Return type:** +- `BOOLEAN` + +**Examples:** + +**Basic NOT IN:** +```sql +-- Exclude departments +SELECT * FROM emp WHERE department NOT IN ('HR', 'Legal'); + +-- Exclude statuses +SELECT * FROM orders WHERE status NOT IN ('cancelled', 'refunded'); + +-- Exclude IDs +SELECT * FROM products WHERE product_id NOT IN (1, 2, 3); +``` + +**With Subquery:** +```sql +-- Exclude customers who have orders +SELECT * FROM customers +WHERE id NOT IN ( + SELECT DISTINCT customer_id FROM orders +); + +-- Exclude inactive categories +SELECT * FROM products +WHERE category_id NOT IN ( + SELECT id FROM categories WHERE active = false +); +``` + +**NULL Handling (Important!):** +```sql +-- NOT IN with NULL in list returns NULL (not true!) +SELECT * FROM users WHERE id NOT IN (1, 2, NULL); +-- Returns no rows because comparison with NULL is NULL + +-- Safe alternative: filter NULLs in subquery +SELECT * FROM customers +WHERE id NOT IN ( + SELECT customer_id FROM orders WHERE customer_id IS NOT NULL +); + +-- Or use NOT EXISTS +SELECT * FROM customers c +WHERE NOT EXISTS ( + SELECT 1 FROM orders o WHERE o.customer_id = c.id +); +``` + +--- + +### Operator: `BETWEEN ... AND ...` + +**Description:** +Checks if an expression lies between two boundaries (inclusive). +- For numeric expressions, `BETWEEN` works as standard SQL. +- For distance expressions (`ST_DISTANCE`), it supports units (`m`, `km`, `mi`, etc.). + +**Syntax:** +```sql +expr BETWEEN lower_bound AND upper_bound +``` + +**Inputs:** +- `expr` - Expression to test +- `lower_bound`, `upper_bound` - Boundary values (inclusive) + +**Return type:** +- `BOOLEAN` + +**Examples:** + +**Numeric BETWEEN:** +```sql +-- Age range +SELECT age FROM users WHERE age BETWEEN 18 AND 30; +-- Equivalent to: age >= 18 AND age <= 30 + +-- Price range +SELECT * FROM products WHERE price BETWEEN 50 AND 100; + +-- Quantity range +SELECT * FROM inventory WHERE stock_quantity BETWEEN 10 AND 100; +``` + +**Temporal BETWEEN:** +```sql +-- Date range +SELECT * FROM orders +WHERE order_date BETWEEN '2025-01-01' AND '2025-01-31'; + +-- With date functions +SELECT * FROM users +WHERE createdAt BETWEEN CURRENT_DATE - INTERVAL 1 MONTH AND CURRENT_DATE; + +-- Complex temporal range +SELECT * FROM users +WHERE createdAt BETWEEN CURRENT_DATE - INTERVAL 1 MONTH AND CURRENT_DATE + AND lastUpdated BETWEEN LAST_DAY('2025-09-11'::DATE) AND DATE_TRUNC(CURRENT_TIMESTAMP, DAY); + +-- Timestamp range +SELECT * FROM events +WHERE event_timestamp BETWEEN '2025-01-10 00:00:00' AND '2025-01-10 23:59:59'; +``` + +**Distance BETWEEN (Geospatial):** + +**Using meters (default):** +```sql +-- Distance in meters +SELECT id FROM locations +WHERE ST_DISTANCE(POINT(-70.0, 40.0), toLocation) BETWEEN 4000 AND 5000; +-- Finds locations between 4km and 5km away +``` + +**With explicit units:** +```sql +-- Distance with km units +SELECT id FROM locations +WHERE ST_DISTANCE(POINT(-70.0, 40.0), toLocation) BETWEEN 4000 km AND 5000 km; + +-- Distance with miles +SELECT id FROM locations +WHERE ST_DISTANCE(POINT(-70.0, 40.0), toLocation) BETWEEN 2.5 mi AND 3.1 mi; +``` + +**Elasticsearch Optimization:** +> 👉 In Elasticsearch translation, distance BETWEEN queries are optimized into a combination of: +> - A **script filter** for the lower bound +> - A `geo_distance` **query** for the upper bound (native ES optimization) + +**String BETWEEN:** +```sql +-- Lexicographic range +SELECT * FROM products WHERE name BETWEEN 'A' AND 'D'; +-- Names starting with A, B, or C + +-- Date strings (if stored as strings) +SELECT * FROM logs WHERE log_date BETWEEN '2025-01' AND '2025-03'; +``` + +**NOT BETWEEN:** +```sql +-- Outside range +SELECT * FROM products WHERE price NOT BETWEEN 50 AND 100; +-- Equivalent to: price < 50 OR price > 100 + +-- Exclude date range +SELECT * FROM orders +WHERE order_date NOT BETWEEN '2024-12-20' AND '2025-01-05'; +``` + +**NULL Handling:** +```sql +-- NULL expression returns NULL (not true or false) +SELECT * FROM products WHERE NULL BETWEEN 10 AND 20; +-- Returns no rows + +-- Column with NULL +SELECT * FROM products WHERE price BETWEEN 10 AND 20; +-- Rows with NULL price are excluded +``` + +--- + +### Operator: `IS NULL` + +**Description:** +Null check predicate. + +**Syntax:** +```sql +expr IS NULL +``` + +**Inputs:** +- `expr` - Expression to test for NULL + +**Return type:** +- `BOOLEAN` + +**Examples:** + +**Basic NULL Check:** +```sql +-- Find rows with NULL manager +SELECT * FROM emp WHERE manager IS NULL; + +-- Find products without description +SELECT * FROM products WHERE description IS NULL; + +-- Find users without email +SELECT * FROM users WHERE email IS NULL; +``` + +**Multiple NULL Checks:** +```sql +-- Check multiple columns +SELECT * FROM contacts +WHERE phone IS NULL AND email IS NULL; + +-- Either column is NULL +SELECT * FROM users +WHERE first_name IS NULL OR last_name IS NULL; +``` + +**In SELECT:** +```sql +-- Flag NULL values +SELECT + name, + email, + CASE WHEN email IS NULL THEN 'No Email' ELSE 'Has Email' END AS email_status +FROM users; + +-- Count NULLs +SELECT + COUNT(*) AS total_rows, + COUNT(email) AS rows_with_email, + SUM(CASE WHEN email IS NULL THEN 1 ELSE 0 END) AS rows_without_email +FROM users; +``` + +**With COALESCE:** +```sql +-- Provide default for NULL +SELECT + name, + COALESCE(email, 'no-email@example.com') AS email +FROM users +WHERE email IS NULL; +``` + +--- + +### Operator: `IS NOT NULL` + +**Description:** +Negated null check. + +**Syntax:** +```sql +expr IS NOT NULL +``` + +**Inputs:** +- `expr` - Expression to test for non-NULL + +**Return type:** +- `BOOLEAN` + +**Examples:** + +**Basic NOT NULL Check:** +```sql +-- Find rows with manager assigned +SELECT * FROM emp WHERE manager IS NOT NULL; + +-- Find products with description +SELECT * FROM products WHERE description IS NOT NULL; + +-- Find users with email +SELECT * FROM users WHERE email IS NOT NULL; +``` + +**Multiple NOT NULL Checks:** +```sql +-- Both columns must have values +SELECT * FROM contacts +WHERE phone IS NOT NULL AND email IS NOT NULL; + +-- At least one column has value +SELECT * FROM users +WHERE first_name IS NOT NULL OR last_name IS NOT NULL; +``` + +**Data Quality Checks:** +```sql +-- Complete records only +SELECT * FROM orders +WHERE customer_id IS NOT NULL + AND order_date IS NOT NULL + AND total_amount IS NOT NULL; + +-- Count complete records +SELECT + COUNT(*) AS total, + COUNT(CASE WHEN email IS NOT NULL AND phone IS NOT NULL THEN 1 END) AS complete_contacts +FROM users; +``` + +**In Aggregations:** +```sql +-- Aggregate only non-NULL values +SELECT + category, + COUNT(*) AS total_products, + COUNT(CASE WHEN price IS NOT NULL THEN 1 END) AS products_with_price +FROM products +GROUP BY category; +``` + +--- + +### Operator: `LIKE` + +**Description:** +Pattern match using `%` and `_`. +- `%` matches zero or more characters (converted to `.*` in regex) +- `_` matches exactly one character (converted to `.` in regex) + +**Syntax:** +```sql +expr LIKE pattern +``` + +**Inputs:** +- `expr` - String expression to test +- `pattern` - Pattern string with `%` and `_` wildcards + +**Return type:** +- `BOOLEAN` + +**Examples:** + +**Basic LIKE Patterns:** +```sql +-- Starts with +SELECT * FROM emp WHERE name LIKE 'Jo%'; +-- Matches: 'John', 'Joe', 'Joseph', etc. + +-- Ends with +SELECT * FROM products WHERE name LIKE '%phone'; +-- Matches: 'iPhone', 'smartphone', 'telephone', etc. + +-- Contains +SELECT * FROM articles WHERE title LIKE '%tutorial%'; +-- Matches any title containing 'tutorial' + +-- Exact length with _ +SELECT * FROM codes WHERE code LIKE '___'; +-- Matches exactly 3 characters: 'ABC', '123', etc. +``` + +**Complex Patterns:** +```sql +-- Starts with and ends with +SELECT * FROM products WHERE name LIKE 'iPhone%Pro'; +-- Matches: 'iPhone 14 Pro', 'iPhone 15 Pro Max', etc. + +-- Multiple wildcards +SELECT * FROM emails WHERE address LIKE '%@%.com'; +-- Matches emails ending with .com + +-- Single character wildcard +SELECT * FROM users WHERE username LIKE 'user_0_'; +-- Matches: 'user_01', 'user_02', ..., 'user_99' + +-- Pattern with specific positions +SELECT * FROM phone_numbers WHERE number LIKE '555-____'; +-- Matches: '555-1234', '555-5678', etc. +``` + +**Case Sensitivity:** +```sql +-- Case-insensitive (depends on collation) +SELECT * FROM users WHERE name LIKE 'john%'; +-- May or may not match 'JOHN', 'John', 'john' + +-- Force case-insensitive with LOWER +SELECT * FROM users WHERE LOWER(name) LIKE LOWER('john%'); +-- Matches all case variations +``` + +**NOT LIKE:** +```sql +-- Exclude pattern +SELECT * FROM products WHERE name NOT LIKE '%discontinued%'; + +-- Exclude multiple patterns +SELECT * FROM articles +WHERE title NOT LIKE '%draft%' + AND title NOT LIKE '%temp%'; +``` + +**Escaping Special Characters:** +```sql +-- Literal % or _ (if supported) +SELECT * FROM products WHERE name LIKE '100\% cotton' ESCAPE '\'; +-- Matches: '100% cotton' + +-- Literal underscore +SELECT * FROM codes WHERE code LIKE 'CODE\_123' ESCAPE '\'; +-- Matches: 'CODE_123' +``` + +**Performance Note:** +```sql +-- Leading wildcard prevents index usage +SELECT * FROM products WHERE name LIKE '%phone'; -- Slow + +-- Prefix search can use index +SELECT * FROM products WHERE name LIKE 'phone%'; -- Fast + +-- Consider full-text search for complex patterns +SELECT * FROM products WHERE MATCH(name) AGAINST ('phone'); +``` + +--- + +### Operator: `RLIKE` + +**Description:** +Regular-expression match (Java regex semantics). + +**Syntax:** +```sql +expr RLIKE pattern +``` + +**Inputs:** +- `expr` - String expression to test +- `pattern` - Regular expression pattern (Java regex syntax) + +**Return type:** +- `BOOLEAN` + +**Examples:** + +**Basic Regex Patterns:** +```sql +-- Email validation +SELECT * FROM users WHERE email RLIKE '.*@example\\.com$'; +-- Matches emails ending with @example.com + +-- Phone number pattern +SELECT * FROM contacts WHERE phone RLIKE '^\\d{3}-\\d{3}-\\d{4}$'; +-- Matches: '555-123-4567' + +-- Starts with pattern +SELECT * FROM products WHERE name RLIKE '^iPhone'; +-- Matches names starting with 'iPhone' + +-- Ends with pattern +SELECT * FROM files WHERE filename RLIKE '\\.(jpg|png|gif)$'; +-- Matches image files +``` + +**Character Classes:** +```sql +-- Alphanumeric +SELECT * FROM codes WHERE code RLIKE '^[A-Z0-9]+$'; +-- Matches uppercase letters and numbers only + +-- Digits only +SELECT * FROM ids WHERE id RLIKE '^\\d+$'; +-- Matches numeric IDs + +-- Letters only +SELECT * FROM names WHERE name RLIKE '^[a-zA-Z]+$'; +-- Matches alphabetic names only +``` + +**Quantifiers:** +```sql +-- Exact count +SELECT * FROM zipcodes WHERE code RLIKE '^\\d{5}$'; +-- Matches exactly 5 digits: '12345' + +-- Range +SELECT * FROM passwords WHERE password RLIKE '^.{8,20}$'; +-- Matches 8 to 20 characters + +-- One or more +SELECT * FROM tags WHERE tag RLIKE '^#[a-z]+$'; +-- Matches hashtags: '#tag', '#example' + +-- Zero or more +SELECT * FROM urls WHERE url RLIKE '^https?://.*'; +-- Matches http:// or https:// URLs +``` + +**Grouping and Alternation:** +```sql +-- Multiple options +SELECT * FROM products WHERE name RLIKE 'iPhone|iPad|iPod'; +-- Matches any Apple i-device + +-- Grouped patterns +SELECT * FROM users WHERE email RLIKE '^(admin|support|info)@.*'; +-- Matches emails starting with admin@, support@, or info@ + +-- Complex grouping +SELECT * FROM codes WHERE code RLIKE '^(US|CA|MX)-[0-9]{4}$'; +-- Matches: 'US-1234', 'CA-5678', 'MX-9012' +``` + +**Anchors:** +```sql +-- Start of string +SELECT * FROM usernames WHERE username RLIKE '^admin'; +-- Matches usernames starting with 'admin' + +-- End of string +SELECT * FROM emails WHERE email RLIKE '@company\\.com$'; +-- Matches emails ending with @company.com + +-- Whole string match +SELECT * FROM codes WHERE code RLIKE '^[A-Z]{3}-[0-9]{4}$'; +-- Matches exactly: 'ABC-1234' +``` + +**Advanced Patterns:** +```sql +-- URL validation +SELECT * FROM links +WHERE url RLIKE '^https?://[a-zA-Z0-9.-]+\\.[a-z]{2,}(/.*)?$'; + +-- IPv4 address +SELECT * FROM servers +WHERE ip_address RLIKE '^([0-9]{1,3}\\.){3}[0-9]{1,3}$'; + +-- Credit card (basic pattern) +SELECT * FROM payments +WHERE card_number RLIKE '^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}$'; + +-- Date format (YYYY-MM-DD) +SELECT * FROM records +WHERE date_str RLIKE '^[0-9]{4}-[0-9]{2}-[0-9]{2}$'; +``` + +**NOT RLIKE:** +```sql +-- Exclude pattern +SELECT * FROM users WHERE email NOT RLIKE '.*@spam\\.com$'; + +-- Exclude invalid formats +SELECT * FROM phone_numbers +WHERE number NOT RLIKE '^[0-9-]+$'; +``` + +**Case Insensitive:** +```sql +-- Use (?i) flag for case-insensitive +SELECT * FROM products WHERE name RLIKE '(?i)^iphone'; +-- Matches: 'iPhone', 'IPHONE', 'iphone', etc. +``` + +**Comparison with LIKE:** +```sql +-- LIKE (simpler, limited wildcards) +SELECT * FROM products WHERE name LIKE 'iPhone%'; + +-- RLIKE (powerful, full regex) +SELECT * FROM products WHERE name RLIKE '^iPhone (1[0-5]|[0-9]) (Pro|Max).*'; +-- Matches: 'iPhone 14 Pro', 'iPhone 15 Pro Max', etc. +``` + +--- + +## Logical Operators + +### Operator: `AND` + +**Description:** +Logical conjunction. Returns true only if both operands are true. + +**Syntax:** +```sql +condition1 AND condition2 +``` + +**Inputs:** +- `condition1`, `condition2` - Boolean expressions + +**Return type:** +- `BOOLEAN` + +**Truth Table:** +| A | B | A AND B | +|---|---|---------| +| true | true | true | +| true | false | false | +| false | true | false | +| false | false | false | +| true | NULL | NULL | +| false | NULL | false | +| NULL | true | NULL | +| NULL | false | false | +| NULL | NULL | NULL | + +**Examples:** + +**Basic AND:** +```sql +-- Both conditions must be true +SELECT * FROM emp WHERE dept = 'IT' AND salary > 50000; + +-- Multiple AND conditions +SELECT * FROM products +WHERE category = 'Electronics' + AND price > 100 + AND in_stock = true; +``` + +**Combining Different Comparisons:** +```sql +-- Range with AND +SELECT * FROM users +WHERE age >= 18 AND age <= 65; + +-- Multiple field checks +SELECT * FROM orders +WHERE status = 'shipped' + AND total_amount > 100 + AND customer_id IS NOT NULL; +``` + +**AND with Complex Expressions:** +```sql +-- With calculations +SELECT * FROM employees +WHERE salary + bonus > 100000 + AND department IN ('Sales', 'IT'); + +-- With pattern matching +SELECT * FROM products +WHERE name LIKE '%Pro%' + AND price BETWEEN 500 AND 2000; +``` + +**NULL Handling:** +```sql +-- NULL in AND expression +SELECT * FROM users +WHERE is_active = true AND email IS NOT NULL; +-- Only returns rows where both conditions are true + +-- NULL propagation +SELECT * FROM products +WHERE price > 100 AND discount IS NULL; +-- Returns products with price > 100 and no discount +``` + +**Chaining Multiple AND:** +```sql +-- Long AND chain +SELECT * FROM orders +WHERE status = 'completed' + AND payment_status = 'paid' + AND shipping_status = 'delivered' + AND customer_rating >= 4 + AND order_date >= '2025-01-01'; +``` + +**Performance Tip:** +```sql +-- Put most selective conditions first +SELECT * FROM large_table +WHERE rare_condition = true -- Filters most rows + AND common_condition = 'value' -- Filters fewer rows + AND another_condition > 100; +``` + +--- + +### Operator: `OR` + +**Description:** +Logical disjunction. Returns true if at least one operand is true. + +**Syntax:** +```sql +condition1 OR condition2 +``` + +**Inputs:** +- `condition1`, `condition2` - Boolean expressions + +**Return type:** +- `BOOLEAN` + +**Truth Table:** +| A | B | A OR B | +|---|---|--------| +| true | true | true | +| true | false | true | +| false | true | true | +| false | false | false | +| true | NULL | true | +| false | NULL | NULL | +| NULL | true | true | +| NULL | false | NULL | +| NULL | NULL | NULL | + +**Examples:** + +**Basic OR:** +```sql +-- At least one condition must be true +SELECT * FROM emp WHERE dept = 'IT' OR dept = 'Sales'; + +-- Multiple OR conditions +SELECT * FROM products +WHERE category = 'Electronics' + OR category = 'Computers' + OR category = 'Phones'; +``` + +**OR with Different Types:** +```sql +-- Numeric OR +SELECT * FROM users +WHERE age < 18 OR age > 65; + +-- String OR +SELECT * FROM orders +WHERE status = 'pending' OR status = 'processing'; + +-- Mixed conditions +SELECT * FROM products +WHERE price < 10 OR on_sale = true; +``` + +**OR with AND (Precedence):** +```sql +-- AND has higher precedence than OR +SELECT * FROM products +WHERE category = 'Electronics' AND price < 100 OR on_sale = true; +-- Evaluated as: ((category = 'Electronics') AND (price < 100)) OR (on_sale = true) + +-- Use parentheses for clarity +SELECT * FROM products +WHERE category = 'Electronics' AND (price < 100 OR on_sale = true); +-- Evaluated as: (category = 'Electronics') AND ((price < 100) OR (on_sale = true)) +``` + +**Multiple OR Conditions:** +```sql +-- Status check +SELECT * FROM orders +WHERE status = 'cancelled' + OR status = 'refunded' + OR status = 'failed'; + +-- Better: Use IN instead +SELECT * FROM orders +WHERE status IN ('cancelled', 'refunded', 'failed'); +``` + +**OR with NULL:** +```sql +-- NULL in OR expression +SELECT * FROM users +WHERE email IS NULL OR phone IS NULL; +-- Returns users missing email OR phone (or both) + +-- TRUE OR NULL = TRUE +SELECT * FROM products +WHERE in_stock = true OR discount IS NULL; +-- Returns in-stock products regardless of discount +``` + +**Complex OR Expressions:** +```sql +-- Combining multiple conditions +SELECT * FROM employees +WHERE (department = 'Sales' AND salary > 80000) + OR (department = 'IT' AND years_experience > 5) + OR (is_manager = true); +``` + +**Performance Consideration:** +```sql +-- OR can prevent index usage +SELECT * FROM users +WHERE first_name = 'John' OR last_name = 'Doe'; +-- May require full table scan + +-- Alternative: UNION (if indexes exist) +SELECT * FROM users WHERE first_name = 'John' +UNION +SELECT * FROM users WHERE last_name = 'Doe'; +``` + +--- + +### Operator: `NOT` + +**Description:** +Logical negation. Inverts the boolean value. + +**Syntax:** +```sql +NOT condition +``` + +**Inputs:** +- `condition` - Boolean expression + +**Return type:** +- `BOOLEAN` + +**Truth Table:** +| A | NOT A | +|---|-------| +| true | false | +| false | true | +| NULL | NULL | + +**Examples:** + +**Basic NOT:** +```sql +-- Negate boolean column +SELECT * FROM emp WHERE NOT active; +-- Same as: WHERE active = false + +-- Negate comparison +SELECT * FROM products WHERE NOT (price > 100); +-- Same as: WHERE price <= 100 +``` + +**NOT with IN:** +```sql +-- Exclude values +SELECT * FROM orders WHERE NOT status IN ('cancelled', 'refunded'); +-- Same as: WHERE status NOT IN ('cancelled', 'refunded') + +-- Explicit NOT +SELECT * FROM products WHERE NOT (category IN ('Discontinued', 'Obsolete')); +``` + +**NOT with BETWEEN:** +```sql +-- Outside range +SELECT * FROM products WHERE NOT (price BETWEEN 50 AND 100); +-- Same as: WHERE price NOT BETWEEN 50 AND 100 +-- Same as: WHERE price < 50 OR price > 100 +``` + +**NOT with LIKE:** +```sql +-- Exclude pattern +SELECT * FROM users WHERE NOT (email LIKE '%@spam.com'); +-- Same as: WHERE email NOT LIKE '%@spam.com' + +-- Multiple NOT LIKE +SELECT * FROM products +WHERE NOT (name LIKE '%discontinued%') + AND NOT (name LIKE '%obsolete%'); +``` + +**NOT with IS NULL:** +```sql +-- Has value +SELECT * FROM emp WHERE NOT (manager IS NULL); +-- Same as: WHERE manager IS NOT NULL + +-- Both fields have values +SELECT * FROM contacts +WHERE NOT (email IS NULL OR phone IS NULL); +-- Same as: WHERE email IS NOT NULL AND phone IS NOT NULL +``` + +**NOT with AND/OR:** +```sql +-- De Morgan's Law: NOT (A AND B) = (NOT A) OR (NOT B) +SELECT * FROM users +WHERE NOT (is_active = true AND is_verified = true); +-- Same as: WHERE is_active = false OR is_verified = false + +-- De Morgan's Law: NOT (A OR B) = (NOT A) AND (NOT B) +SELECT * FROM products +WHERE NOT (category = 'Discontinued' OR in_stock = false); +-- Same as: WHERE category != 'Discontinued' AND in_stock = true +``` + +**Double Negation:** +```sql +-- NOT NOT = identity +SELECT * FROM users WHERE NOT (NOT is_active); +-- Same as: WHERE is_active + +-- Can be confusing, avoid in practice +SELECT * FROM products WHERE NOT (NOT (price > 100)); +-- Same as: WHERE price > 100 +``` + +**NOT with Complex Expressions:** +```sql +-- Negate entire condition +SELECT * FROM orders +WHERE NOT ( + status = 'completed' + AND payment_status = 'paid' + AND total_amount > 1000 +); +-- Returns orders that don't meet ALL three conditions + +-- Negate with parentheses +SELECT * FROM employees +WHERE NOT (department = 'Sales' AND salary < 50000); +-- Returns non-Sales employees OR Sales employees earning >= 50000 +``` + +**NOT with EXISTS:** +```sql +-- Find customers without orders +SELECT * FROM customers c +WHERE NOT EXISTS ( + SELECT 1 FROM orders o WHERE o.customer_id = c.id +); +``` + +**Practical Examples:** +```sql +-- Exclude inactive and unverified users +SELECT * FROM users +WHERE NOT (is_active = false OR is_verified = false); +-- Same as: WHERE is_active = true AND is_verified = true + +-- Products not in specific categories +SELECT * FROM products +WHERE NOT (category IN ('Discontinued', 'Clearance', 'Obsolete')); + +-- Orders not in terminal states +SELECT * FROM orders +WHERE NOT (status IN ('completed', 'cancelled', 'refunded')); + +-- Users without complete profile +SELECT * FROM users +WHERE NOT ( + email IS NOT NULL + AND phone IS NOT NULL + AND address IS NOT NULL +); +``` + +**NULL Handling:** +```sql +-- NOT NULL = NULL (not false!) +SELECT * FROM products WHERE NOT (discount IS NULL); +-- Same as: WHERE discount IS NOT NULL + +-- NOT with NULL comparison +SELECT * FROM users WHERE NOT (status = NULL); +-- Always returns no rows (NULL comparison is always NULL) +-- Use: WHERE status IS NOT NULL +``` + +**Best Practices:** +```sql +-- Good: Use positive logic when possible +SELECT * FROM users WHERE is_active = true; + +-- Avoid: Double negatives +SELECT * FROM users WHERE NOT (is_active = false); + +-- Good: Use specific operators +SELECT * FROM products WHERE category NOT IN ('A', 'B'); + +-- Avoid: NOT with IN +SELECT * FROM products WHERE NOT (category IN ('A', 'B')); +``` + +--- + +### Combining Logical Operators + +**AND + OR + NOT:** +```sql +-- Complex business logic +SELECT * FROM orders +WHERE ( + (status = 'pending' AND created_date < DATE_SUB(CURRENT_DATE, INTERVAL 7 DAY)) + OR (status = 'processing' AND priority = 'high') + ) + AND NOT (customer_type = 'blocked') + AND total_amount > 0; +``` + +**Precedence Reminder:** +1. `NOT` (highest) +2. `AND` +3. `OR` (lowest) + +```sql +-- Without parentheses (follows precedence) +SELECT * FROM products +WHERE NOT in_stock AND price < 100 OR on_sale = true; +-- Evaluated as: ((NOT in_stock) AND (price < 100)) OR (on_sale = true) + +-- With parentheses (explicit) +SELECT * FROM products +WHERE NOT (in_stock AND price < 100) OR on_sale = true; +-- Evaluated as: (NOT (in_stock AND price < 100)) OR (on_sale = true) +``` + +**De Morgan's Laws:** +```sql +-- NOT (A AND B) = (NOT A) OR (NOT B) +SELECT * FROM users +WHERE NOT (is_active = true AND is_verified = true); +-- Equivalent to: +SELECT * FROM users +WHERE is_active = false OR is_verified = false; + +-- NOT (A OR B) = (NOT A) AND (NOT B) +SELECT * FROM products +WHERE NOT (category = 'A' OR category = 'B'); +-- Equivalent to: +SELECT * FROM products +WHERE category != 'A' AND category != 'B'; +-- Or better: +SELECT * FROM products +WHERE category NOT IN ('A', 'B'); +``` + +--- + +## Cast Operators + +### Operator: `::` + +**Description:** +Provides an alternative syntax to the [CAST](functions_type_conversion.md#type-conversion-functions) function. PostgreSQL-style type casting. + +**Syntax:** +```sql +expr::TYPE +``` + +**Inputs:** +- `expr` - Expression to convert +- `TYPE` - Target data type (`DATE`, `TIMESTAMP`, `VARCHAR`, `INT`, `DOUBLE`, etc.) + +**Return type:** +- `TYPE` + +**Examples:** + +**Basic Type Casting:** +```sql +-- String to DATE +SELECT hire_date::DATE FROM emp; + +-- String to INT +SELECT '123'::INT AS num; +-- Result: 123 + +-- String to DOUBLE +SELECT '123.45'::DOUBLE AS num; +-- Result: 123.45 + +-- INT to VARCHAR +SELECT 12345::VARCHAR AS str; +-- Result: '12345' +``` + +**Date and Time Casting:** +```sql +-- String to DATE +SELECT '2025-01-10'::DATE AS d; +-- Result: 2025-01-10 + +-- String to TIMESTAMP +SELECT '2025-01-10 14:30:00'::TIMESTAMP AS ts; +-- Result: 2025-01-10 14:30:00 + +-- TIMESTAMP to DATE +SELECT CURRENT_TIMESTAMP::DATE AS today; +-- Result: 2025-10-27 + +-- Date string with explicit cast +SELECT order_date::DATE FROM orders; +``` + +**Numeric Casting:** +```sql +-- INT to DOUBLE +SELECT 100::DOUBLE AS d; +-- Result: 100.0 + +-- DOUBLE to INT (truncates) +SELECT 123.99::INT AS i; +-- Result: 123 + +-- String to DECIMAL +SELECT '123.45'::DECIMAL(10, 2) AS dec; +-- Result: 123.45 +``` + +**Boolean Casting:** +```sql +-- String to BOOLEAN +SELECT 'true'::BOOLEAN AS b; +-- Result: true + +SELECT 'false'::BOOLEAN AS b; +-- Result: false + +-- INT to BOOLEAN +SELECT 1::BOOLEAN AS b; +-- Result: true + +SELECT 0::BOOLEAN AS b; +-- Result: false +``` + +**In WHERE Clause:** +```sql +-- Cast for comparison +SELECT * FROM orders +WHERE order_date::DATE >= '2025-01-01'::DATE; + +-- Cast string to number +SELECT * FROM products +WHERE price_str::DOUBLE > 100; + +-- Cast to timestamp +SELECT * FROM events +WHERE event_time::TIMESTAMP >= '2025-01-10 00:00:00'::TIMESTAMP; +``` + +**In Calculations:** +```sql +-- Force float division +SELECT total::DOUBLE / count::DOUBLE AS average +FROM statistics; + +-- Cast for arithmetic +SELECT (price_str::DOUBLE * quantity_str::INT) AS total +FROM order_items; +``` + +**Chained Casting:** +```sql +-- Multiple casts +SELECT hire_date::VARCHAR::DATE FROM emp; +-- First to VARCHAR, then to DATE + +-- Cast then manipulate +SELECT (salary::VARCHAR || ' USD') AS formatted_salary +FROM employees; +``` + +**Comparison with CAST Function:** +```sql +-- Using :: operator (PostgreSQL style) +SELECT hire_date::DATE FROM emp; + +-- Using CAST function (standard SQL) +SELECT CAST(hire_date AS DATE) FROM emp; + +-- Both are equivalent, choose based on preference +-- :: is shorter and more readable +-- CAST is more standard and portable +``` + +**Complex Examples:** +```sql +-- Cast in JOIN condition +SELECT o.*, p.* +FROM orders o +JOIN products p ON o.product_id::VARCHAR = p.product_code; + +-- Cast in GROUP BY +SELECT + order_date::DATE AS order_day, + COUNT(*) AS order_count +FROM orders +GROUP BY order_date::DATE; + +-- Cast in CASE expression +SELECT + product_id, + CASE + WHEN stock_str::INT > 100 THEN 'High' + WHEN stock_str::INT > 50 THEN 'Medium' + ELSE 'Low' + END AS stock_level +FROM inventory; +``` + +**Error Handling:** +```sql +-- Invalid cast throws error +SELECT 'not-a-number'::INT; +-- ERROR: Cannot cast 'not-a-number' to INT + +-- Use TRY_CAST for safe casting +SELECT TRY_CAST('not-a-number' AS INT); +-- Result: NULL + +-- Note: :: operator doesn't have a "try" version +-- Use CAST/TRY_CAST for error handling +``` + +**NULL Handling:** +```sql +-- NULL cast returns NULL +SELECT NULL::INT AS result; +-- Result: NULL + +-- Cast NULL column +SELECT price::DOUBLE FROM products; +-- NULL prices remain NULL +``` + +**Best Practices:** +```sql +-- Good: Use :: for readability +SELECT created_at::DATE FROM users; + +-- Good: Use CAST for compatibility +SELECT CAST(created_at AS DATE) FROM users; + +-- Good: Cast both sides of comparison +SELECT * FROM orders +WHERE order_date::DATE = '2025-01-10'::DATE; + +-- Avoid: Implicit type conversion (may cause issues) +SELECT * FROM orders WHERE order_date = '2025-01-10'; +``` + +--- + +### Summary: Cast Operators + +| Syntax | Example | Notes | +|--------------------------|--------------------------|-----------------------------| +| `::TYPE` | `'123'::INT` | PostgreSQL style, shorter | +| `CAST(expr AS TYPE)` | `CAST('123' AS INT)` | Standard SQL, more portable | +| `CONVERT(expr, TYPE)` | `CONVERT('123', INT)` | MySQL style (if supported) | +| `TRY_CAST(expr AS TYPE)` | `TRY_CAST('abc' AS INT)` | Returns NULL on error | + +**When to use `::`:** +- PostgreSQL-compatible systems +- Quick, readable casts +- When you're confident the cast will succeed + +**When to use `CAST`:** +- Maximum SQL portability +- When writing cross-database queries +- Corporate/enterprise environments + +**When to use `TRY_CAST`:** +- Uncertain data quality +- User input +- ETL/data import operations +- When you want NULL instead of errors + +[Back to index](README.md) diff --git a/documentation/request_structure.md b/documentation/sql/request_structure.md similarity index 92% rename from documentation/request_structure.md rename to documentation/sql/request_structure.md index f942df23..44c01070 100644 --- a/documentation/request_structure.md +++ b/documentation/sql/request_structure.md @@ -1,8 +1,8 @@ -[Back to index](./README.md) +[Back to index](README.md) # Query Structure -**Navigation:** [Operators](./operators.md) · [Functions — Aggregate](./functions_aggregate.md) · [Keywords](./keywords.md) +**Navigation:** [Operators](operators.md) · [Functions — Aggregate](functions_aggregate.md) · [Keywords](keywords.md) This page documents the SQL clauses supported by the engine and how they map to Elasticsearch. @@ -109,4 +109,4 @@ Limit and paging. For pure aggregations, `size` is typically set to 0 and `limit SELECT * FROM emp ORDER BY hire_date DESC LIMIT 10 OFFSET 20; ``` -[Back to index](./README.md) +[Back to index](README.md) diff --git a/documentation/type_conversion.md b/documentation/sql/type_conversion.md similarity index 96% rename from documentation/type_conversion.md rename to documentation/sql/type_conversion.md index ead0e61f..53af6718 100644 --- a/documentation/type_conversion.md +++ b/documentation/sql/type_conversion.md @@ -1,4 +1,4 @@ -[Back to index](./README.md) +[Back to index](README.md) # Type Conversion Functions and Operators @@ -69,4 +69,4 @@ SELECT '2025-09-11'::DATE AS d, '125'::BIGINT AS b; - `::` is syntactic sugar, easier to read in queries. - Type inference relies on `baseType`, and explicit `CAST`/`TRY_CAST`/`::` updates the type context for following functions. -[Back to index](./README.md) +[Back to index](README.md) diff --git a/es6/jest/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi b/es6/jest/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi new file mode 100644 index 00000000..74e4f7de --- /dev/null +++ b/es6/jest/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi @@ -0,0 +1 @@ +app.softnetwork.elastic.client.spi.JestClientSpi \ No newline at end of file diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestAliasApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestAliasApi.scala new file mode 100644 index 00000000..6bb58291 --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestAliasApi.scala @@ -0,0 +1,118 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.{AliasApi, IndicesApi} +import app.softnetwork.elastic.client.result.ElasticResult +import io.searchbox.client.JestResult +import io.searchbox.indices.aliases.{AddAliasMapping, GetAliases, ModifyAliases, RemoveAliasMapping} + +import scala.jdk.CollectionConverters._ + +/** Alias management API for Jest (Elasticsearch HTTP Client). + * @see + * [[AliasApi]] for generic API documentation + */ +trait JestAliasApi extends AliasApi with JestClientHelpers { + _: IndicesApi with JestClientCompanion => + + /** Add an alias to an index. + * @see + * [[AliasApi.addAlias]] + */ + private[client] def executeAddAlias(index: String, alias: String): ElasticResult[Boolean] = + executeJestBooleanAction( + operation = "addAlias", + index = Some(s"$index -> $alias"), + retryable = false // Aliases operations can not be retried + ) { + new ModifyAliases.Builder( + new AddAliasMapping.Builder(index, alias).build() + ).build() + } + + /** Remove an alias from an index. + * @see + * [[AliasApi.removeAlias]] + */ + private[client] def executeRemoveAlias(index: String, alias: String): ElasticResult[Boolean] = + executeJestBooleanAction( + operation = "removeAlias", + index = Some(s"$index -> $alias"), + retryable = false + ) { + new ModifyAliases.Builder( + new RemoveAliasMapping.Builder(index, alias).build() + ).build() + } + + /** Check if an alias exists. + * @see + * [[AliasApi.aliasExists]] + */ + override private[client] def executeAliasExists(alias: String): ElasticResult[Boolean] = + executeJestAction( + operation = "aliasExists", + index = Some(alias), + retryable = true + ) { + new GetAliases.Builder() + .addAlias(alias) + .build() + }(result => !result.getJsonObject.getAsJsonObject.entrySet().isEmpty) + + /** Get aliases for a given index. + * @see + * [[AliasApi.getAliases]] + */ + private[client] def executeGetAliases(index: String): ElasticResult[String] = + executeJestAction[JestResult, String]( + operation = "getAliases", + index = Some(index), + retryable = true + ) { + new GetAliases.Builder() + .addIndex(index) + .build() + } { result => + result.getJsonString + } + + /** Swap an alias from an old index to a new index atomically. + * @see + * [[AliasApi.swapAlias]] + */ + override private[client] def executeSwapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): ElasticResult[Boolean] = + executeJestBooleanAction( + operation = "swapAlias", + index = Some(s"$oldIndex -> $newIndex"), + retryable = false + ) { + new ModifyAliases.Builder( + Seq( + // ✅ Remove from old index + new RemoveAliasMapping.Builder(oldIndex, alias).build(), + // ✅ Add to new index + new AddAliasMapping.Builder(newIndex, alias).build() + ).asJava + ).build() + } +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestBulkApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestBulkApi.scala new file mode 100644 index 00000000..9399ecec --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestBulkApi.scala @@ -0,0 +1,378 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import akka.NotUsed +import akka.actor.ActorSystem +import akka.stream.scaladsl.Flow +import app.softnetwork.elastic.client.{BulkApi, IndexApi, RefreshApi, SettingsApi} +import app.softnetwork.elastic.client.bulk.{ + BulkAction, + BulkElasticAction, + BulkError, + BulkItem, + BulkOptions, + FailedDocument, + SuccessfulDocument +} +import io.searchbox.action.BulkableAction +import io.searchbox.core.{Bulk, Delete, Index, Update} +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods + +import scala.concurrent.{ExecutionContext, Future} +import scala.jdk.CollectionConverters._ +import scala.language.implicitConversions +import scala.util.{Failure, Success, Try} + +trait JestBulkApi extends BulkApi { + _: RefreshApi with SettingsApi with IndexApi with JestClientCompanion => + + // ======================================================================== + // TYPE ALIASES FOR JEST + // ======================================================================== + + override type BulkActionType = BulkableAction[_] + override type BulkResultType = io.searchbox.core.BulkResult + + // ======================================================================== + // BULK ACTION CONVERSION + // ======================================================================== + + override implicit def toBulkElasticAction(a: BulkActionType): BulkElasticAction = { + new BulkElasticAction { + override def index: String = { + a match { + case idx: Index => idx.getIndex + case upd: Update => upd.getIndex + case del: Delete => del.getIndex + case _ => "" + } + } + } + } + + // ======================================================================== + // BULK FLOW IMPLEMENTATION + // ======================================================================== + + override private[client] def bulkFlow(implicit + bulkOptions: BulkOptions, + system: ActorSystem + ): Flow[Seq[BulkActionType], BulkResultType, NotUsed] = { + implicit val ec: ExecutionContext = system.dispatcher + + Flow[Seq[BulkActionType]] + .mapAsync(1) { actions => + Future { + val bulkBuilder = new Bulk.Builder() + .defaultIndex(bulkOptions.defaultIndex) + .defaultType(bulkOptions.defaultType) + + actions.foreach { + case idx: Index => + bulkBuilder.addAction(idx) + case upd: Update => + bulkBuilder.addAction(upd) + case del: Delete => + bulkBuilder.addAction(del) + case other => + logger.warn(s"Unsupported action type: ${other.getClass.getName}") + } + + val bulk = bulkBuilder.build() + + Try(apply().execute(bulk)) match { + case Success(result) => + if (!result.isSucceeded) { + logger.warn( + s"Bulk operation completed with errors: ${result.getErrorMessage}" + ) + } else { + logger.info(s"Bulk operation succeeded with ${actions.size} actions.") + } + result + + case Failure(ex) => + logger.error(s"Bulk execution failed: ${ex.getMessage}", ex) + throw ex + } + } + } + } + + // ======================================================================== + // EXTRACT BULK RESULTS + // ======================================================================== + + override private[client] def extractBulkResults( + result: BulkResultType, + originalBatch: Seq[BulkItem] + ): Seq[Either[FailedDocument, SuccessfulDocument]] = { + + // no results at all + if ( + originalBatch.nonEmpty && + (result == null || (result.getItems == null && result.getFailedItems == null)) + ) { + logger.error("Bulk result is null or has no items") + return originalBatch.map { item => + Left( + FailedDocument( + id = item.id.getOrElse("unknown"), + index = item.index, + document = item.document, + error = BulkError( + message = "Null bulk result", + `type` = "internal_error", + status = 500 + ), + retryable = false + ) + ) + } + } + + // process failed items + val failedItems = result.getFailedItems.asScala.toSeq.map { item => + val itemIndex = Option(item.index).getOrElse("unknown_index") + val itemId = Option(item.id).getOrElse("unknown_id") + + val bulkError = parseJestError(item.error, item.status) + + Left( + FailedDocument( + id = itemId, + index = itemIndex, + document = originalBatch + .find(o => o.id.contains(itemId) && o.index == itemIndex) + .map(_.document) + .getOrElse("{}"), + error = bulkError, + retryable = bulkError.isRetryable + ) + ) + } + + // process successful items + val items = + result.getItems.asScala.toSeq.filter(i => i.error == null || i.error.trim.isEmpty).map { + item => + //val itemIndex = Option(item.index).getOrElse("unknown_index") + //val itemId = Option(item.id).getOrElse("unknown_id") + Right(SuccessfulDocument(id = item.id, index = item.index)) + } + + val results = failedItems ++ items + + // if no individual results but overall failure, mark all as failed + if (results.isEmpty && originalBatch.nonEmpty && !result.isSucceeded) { + logger.error(s"Bulk operation completed with errors: ${result.getErrorMessage}") + implicit val formats: DefaultFormats = org.json4s.DefaultFormats + val errorString = result.getJsonString + val bulkError = + Try { + + val json = JsonMethods.parse(errorString, useBigDecimalForDouble = false) + + val errorType = (json \ "error" \ "type").extractOpt[String].getOrElse("unknown_error") + val reason = (json \ "error" \ "reason").extractOpt[String].getOrElse(errorString) + val status = (json \ "status").extractOpt[Int].getOrElse(500) + + // Extract caused_by if present + val causedBy = + (json \ "error" \ "root_cause").extract[Seq[Map[String, Any]]].headOption.map { + caused => + BulkError( + message = caused.getOrElse("reason", "Unknown cause").toString, + `type` = caused.getOrElse("type", "unknown").toString, + status = status + ) + } + + BulkError( + message = reason, + `type` = errorType, + status = status, + causedBy = causedBy + ) + }.getOrElse { + BulkError( + message = errorString, + `type` = "parse_error", + status = 500 + ) + } + return originalBatch.map { item => + Left( + FailedDocument( + id = item.id.getOrElse("unknown"), + index = item.index, + document = item.document, + error = bulkError, + retryable = false + ) + ) + } + } + + results + } + + // ======================================================================== + // BULK ITEM TO ACTION CONVERSION + // ======================================================================== + + override private[client] implicit def toBulkAction(bulkItem: BulkItem): BulkActionType = { + bulkItem.action match { + case BulkAction.INDEX => + val indexBuilder = new Index.Builder(bulkItem.document) + .index(bulkItem.index) + + bulkItem.id.foreach(id => indexBuilder.id(id)) + bulkItem.parent.foreach(parent => indexBuilder.setParameter("parent", parent)) + + indexBuilder.build() + + case BulkAction.UPDATE => + // Use docAsUpsert helper + val upsertDoc = docAsUpsert(bulkItem.document) + + val updateBuilder = new Update.Builder(upsertDoc) + .index(bulkItem.index) + .id(bulkItem.id.getOrElse("")) + + bulkItem.parent.foreach(parent => updateBuilder.setParameter("parent", parent)) + + updateBuilder.build() + + case BulkAction.DELETE => + val deleteBuilder = new Delete.Builder(bulkItem.id.getOrElse("")) + .index(bulkItem.index) + + bulkItem.parent.foreach(parent => deleteBuilder.setParameter("parent", parent)) + + deleteBuilder.build() + } + } + + // ======================================================================== + // ACTION TO BULK ITEM CONVERSION + // ======================================================================== + + override private[client] def actionToBulkItem(action: BulkActionType): BulkItem = { + action match { + case idx: Index => + BulkItem( + index = idx.getIndex, + action = BulkAction.INDEX, + document = Option(idx.getData(null)).getOrElse("{}"), + id = Option(idx.getId), + parent = Option(idx.getParameter("parent")).map(_.toString) + ) + + case upd: Update => + // Extract original document from update payload + val updatePayload = Option(upd.getData(null)).getOrElse("{}") + val document = extractDocFromUpdate(updatePayload) + + BulkItem( + index = upd.getIndex, + action = BulkAction.UPDATE, + document = document, + id = Option(upd.getId), + parent = Option(upd.getParameter("parent")).map(_.toString) + ) + + case del: Delete => + BulkItem( + index = del.getIndex, + action = BulkAction.DELETE, + document = "{}", + id = Option(del.getId), + parent = Option(del.getParameter("parent")).map(_.toString) + ) + + case _ => + BulkItem( + index = "", + action = BulkAction.INDEX, + document = "{}", + id = None, + parent = None + ) + } + } + + // ======================================================================== + // HELPER METHODS + // ======================================================================== + + /** Parse error from Jest bulk item error string + */ + private def parseJestError(errorString: String, status: Int): BulkError = { + implicit val formats: DefaultFormats = org.json4s.DefaultFormats + + Try { + val json = JsonMethods.parse(errorString, useBigDecimalForDouble = false) + + val errorType = (json \ "type").extractOpt[String].getOrElse("unknown_error") + val reason = (json \ "reason").extractOpt[String].getOrElse(errorString) + + // Extract caused_by if present + val causedBy = (json \ "caused_by").extractOpt[Map[String, Any]].map { caused => + BulkError( + message = caused.getOrElse("reason", "Unknown cause").toString, + `type` = caused.getOrElse("type", "unknown").toString, + status = status + ) + } + + BulkError( + message = reason, + `type` = errorType, + status = status, + causedBy = causedBy + ) + }.getOrElse { + BulkError( + message = errorString, + `type` = "parse_error", + status = status + ) + } + } + + /** Extract document from update payload (removes doc_as_upsert wrapper) + */ + private def extractDocFromUpdate(updatePayload: String): String = { + implicit val formats: DefaultFormats = org.json4s.DefaultFormats + + Try { + val json = JsonMethods.parse(updatePayload, useBigDecimalForDouble = false) + (json \ "doc").extractOpt[Map[String, Any]] match { + case Some(doc) => org.json4s.jackson.Serialization.write(doc) + case None => updatePayload + } + }.getOrElse(updatePayload) + } + + /** Helper to create doc_as_upsert payload + */ + private def docAsUpsert(doc: String): String = s"""{"doc":$doc,"doc_as_upsert":true}""" + +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestClientApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestClientApi.scala index f913170c..f6cd2e76 100644 --- a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestClientApi.scala +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestClientApi.scala @@ -16,32 +16,15 @@ package app.softnetwork.elastic.client.jest -import akka.NotUsed -import akka.actor.ActorSystem -import akka.stream.scaladsl.Flow import app.softnetwork.elastic.client._ -import app.softnetwork.elastic.sql -import app.softnetwork.elastic.sql.query.{SQLQuery, SQLSearchRequest} +import app.softnetwork.elastic.sql.query.SQLQuery import app.softnetwork.elastic.sql.bridge._ -import app.softnetwork.persistence.model.Timestamped -import app.softnetwork.serialization._ -import com.google.gson.JsonParser import io.searchbox.action.BulkableAction import io.searchbox.core._ -import io.searchbox.core.search.aggregation.RootAggregation -import io.searchbox.indices._ -import io.searchbox.indices.aliases.{AddAliasMapping, ModifyAliases, RemoveAliasMapping} -import io.searchbox.indices.mapping.{GetMapping, PutMapping} -import io.searchbox.indices.reindex.Reindex -import io.searchbox.indices.settings.{GetSettings, UpdateSettings} -import io.searchbox.params.Parameters import org.json4s.Formats -//import scala.jdk.CollectionConverters._ -import scala.collection.JavaConverters._ -import scala.concurrent.{ExecutionContext, Future, Promise} +import scala.jdk.CollectionConverters._ import scala.language.implicitConversions -import scala.util.{Failure, Success, Try} /** Created by smanciot on 20/05/2021. */ @@ -54,779 +37,20 @@ trait JestClientApi with JestRefreshApi with JestFlushApi with JestCountApi - with JestSingleValueAggregateApi with JestIndexApi with JestUpdateApi with JestDeleteApi with JestGetApi with JestSearchApi + with JestScrollApi with JestBulkApi + with JestVersionApi + with JestClientCompanion -trait JestIndicesApi extends IndicesApi with JestRefreshApi with JestClientCompanion { - override def createIndex(index: String, settings: String = defaultSettings): Boolean = - tryOrElse( - apply() - .execute( - new CreateIndex.Builder(index).settings(settings).build() - ) - .isSucceeded, - false - )(logger) - - override def deleteIndex(index: String): Boolean = - tryOrElse( - apply() - .execute( - new DeleteIndex.Builder(index).build() - ) - .isSucceeded, - false - )(logger) - - override def closeIndex(index: String): Boolean = - tryOrElse( - apply() - .execute( - new CloseIndex.Builder(index).build() - ) - .isSucceeded, - false - )(logger) - - override def openIndex(index: String): Boolean = - tryOrElse( - apply() - .execute( - new OpenIndex.Builder(index).build() - ) - .isSucceeded, - false - )(logger) - - /** Reindex from source index to target index. - * - * @param sourceIndex - * - the name of the source index - * @param targetIndex - * - the name of the target index - * @param refresh - * - true to refresh the target index after reindexing, false otherwise - * @return - * true if the reindexing was successful, false otherwise - */ - override def reindex(sourceIndex: String, targetIndex: String, refresh: Boolean): Boolean = { - tryOrElse( - { - apply() - .execute( - new Reindex.Builder(s"""{"index": "$sourceIndex"}""", s"""{"index": "$targetIndex"}""") - .build() - ) - .isSucceeded && { - if (refresh) { - this.refresh(targetIndex) - } else { - true - } - } - }, - false - )(logger) - } - - /** Check if an index exists. - * - * @param index - * - the name of the index to check - * @return - * true if the index exists, false otherwise - */ - override def indexExists(index: String): Boolean = - tryOrElse( - apply() - .execute( - new IndicesExists.Builder(index).build() - ) - .isSucceeded, - false - )(logger) -} - -trait JestAliasApi extends AliasApi with JestClientCompanion { - override def addAlias(index: String, alias: String): Boolean = { - tryOrElse( - apply() - .execute( - new ModifyAliases.Builder( - new AddAliasMapping.Builder(index, alias).build() - ).build() - ) - .isSucceeded, - false - )(logger) - } - - override def removeAlias(index: String, alias: String): Boolean = { - tryOrElse( - apply() - .execute( - new ModifyAliases.Builder( - new RemoveAliasMapping.Builder(index, alias).build() - ).build() - ) - .isSucceeded, - false - )(logger) - } -} - -trait JestSettingsApi extends SettingsApi with JestClientCompanion { - _: IndicesApi => - override def updateSettings(index: String, settings: String = defaultSettings): Boolean = - closeIndex(index) && - tryOrElse( - apply() - .execute( - new UpdateSettings.Builder(settings).addIndex(index).build() - ) - .isSucceeded, - false - )(logger) && - openIndex(index) - - override def loadSettings(index: String): String = - tryOrElse( - { - new JsonParser() - .parse( - apply() - .execute( - new GetSettings.Builder().addIndex(index).build() - ) - .getJsonString - ) - .getAsJsonObject - .get(index) - .getAsJsonObject - .get("settings") - .getAsJsonObject - .get("index") - .getAsJsonObject - .toString - }, - "{}" - )(logger) -} - -trait JestMappingApi extends MappingApi with JestClientCompanion { - _: IndicesApi => - override def setMapping(index: String, mapping: String): Boolean = - tryOrElse( - apply() - .execute( - new PutMapping.Builder(index, "_doc", mapping).build() - ) - .isSucceeded, - false - )(logger) - - override def getMapping(index: String): String = - tryOrElse( - { - new JsonParser() - .parse( - apply() - .execute( - new GetMapping.Builder().addIndex(index).addType("_doc").build() - ) - .getJsonString - ) - .getAsJsonObject - .get(index) - .getAsJsonObject - .get("mappings") - .getAsJsonObject - .get("_doc") - .getAsJsonObject - .toString - }, - s""""{"properties": {}}""" // empty mapping - )(logger) - - /** Get the mapping properties of an index. - * - * @param index - * - the name of the index to get the mapping properties for - * @return - * the mapping properties of the index as a JSON string - */ - override def getMappingProperties(index: String): String = { - tryOrElse( - getMapping(index), - "{\"properties\": {}}" - )(logger) - } -} - -trait JestRefreshApi extends RefreshApi with JestClientCompanion { - override def refresh(index: String): Boolean = - tryOrElse( - apply() - .execute( - new Refresh.Builder().addIndex(index).build() - ) - .isSucceeded, - false - )(logger) -} - -trait JestFlushApi extends FlushApi with JestClientCompanion { - override def flush(index: String, force: Boolean = true, wait: Boolean = true): Boolean = - tryOrElse( - apply() - .execute( - new Flush.Builder().addIndex(index).force(force).waitIfOngoing(wait).build() - ) - .isSucceeded, - false - )(logger) -} - -trait JestCountApi extends CountApi with JestClientCompanion { - override def countAsync( - jsonQuery: JSONQuery - )(implicit ec: ExecutionContext): Future[Option[Double]] = { - import JestClientResultHandler._ - import jsonQuery._ - val count = new Count.Builder().query(query) - for (indice <- indices) count.addIndex(indice) - for (t <- types) count.addType(t) - val promise = Promise[Option[Double]]() - apply().executeAsyncPromise(count.build()) onComplete { - case Success(result) => - if (!result.isSucceeded) - logger.error(result.getErrorMessage) - promise.success(Option(result.getCount)) - case Failure(f) => - logger.error(f.getMessage, f) - promise.failure(f) - } - promise.future - } - - override def count(jsonQuery: JSONQuery): Option[Double] = { - import jsonQuery._ - val count = new Count.Builder().query(query) - for (indice <- indices) count.addIndex(indice) - for (t <- types) count.addType(t) - Try { - apply().execute(count.build()) - } match { - case Success(result) => - if (!result.isSucceeded) - logger.error(result.getErrorMessage) - Option(result.getCount) - case Failure(f) => - logger.error(f.getMessage, f) - None - } - } -} - -trait JestSingleValueAggregateApi extends SingleValueAggregateApi with JestCountApi { - override def aggregate( - sqlQuery: SQLQuery - )(implicit ec: ExecutionContext): Future[Seq[SingleValueAggregateResult]] = { - val aggregations: Seq[ElasticAggregation] = sqlQuery - val futures = for (aggregation <- aggregations) yield { - val promise: Promise[SingleValueAggregateResult] = Promise() - val field = aggregation.field - val sourceField = aggregation.sourceField - val aggType = aggregation.aggType - val aggName = aggregation.aggName - val query = aggregation.query - val sources = aggregation.sources - sourceField match { - case "_id" if aggType.sql == "count" => - countAsync( - JSONQuery( - query, - collection.immutable.Seq(sources: _*), - collection.immutable.Seq.empty[String] - ) - ).onComplete { - case Success(result) => - promise.success( - SingleValueAggregateResult( - field, - aggType, - result.map(r => NumericValue(r.doubleValue())).getOrElse(EmptyValue), - None - ) - ) - case Failure(f) => - logger.error(f.getMessage, f.fillInStackTrace()) - promise.success( - SingleValueAggregateResult(field, aggType, EmptyValue, Some(f.getMessage)) - ) - } - promise.future - case _ => - import JestClientApi._ - import JestClientResultHandler._ - apply() - .executeAsyncPromise( - JSONQuery( - query, - collection.immutable.Seq(sources: _*), - collection.immutable.Seq.empty[String] - ).search - ) - .onComplete { - case Success(result) => - val agg = aggName.split("\\.").last - - val itAgg = aggName.split("\\.").iterator - - var root = - if (aggregation.nested) - result.getAggregations.getAggregation(itAgg.next(), classOf[RootAggregation]) - else - result.getAggregations - - if (aggregation.filtered) { - root = root.getAggregation(itAgg.next(), classOf[RootAggregation]) - } - - promise.success( - SingleValueAggregateResult( - field, - aggType, - aggType match { - case sql.function.aggregate.COUNT => - if (aggregation.distinct) - NumericValue( - root.getCardinalityAggregation(agg).getCardinality.doubleValue() - ) - else { - NumericValue( - root.getValueCountAggregation(agg).getValueCount.doubleValue() - ) - } - case sql.function.aggregate.SUM => - NumericValue(root.getSumAggregation(agg).getSum) - case sql.function.aggregate.AVG => - NumericValue(root.getAvgAggregation(agg).getAvg) - case sql.function.aggregate.MIN => - NumericValue(root.getMinAggregation(agg).getMin) - case sql.function.aggregate.MAX => - NumericValue(root.getMaxAggregation(agg).getMax) - case _ => EmptyValue - }, - None - ) - ) - - case Failure(f) => - logger.error(f.getMessage, f.fillInStackTrace()) - promise.success( - SingleValueAggregateResult(field, aggType, EmptyValue, Some(f.getMessage)) - ) - } - - promise.future - } - } - Future.sequence(futures) - } -} - -trait JestIndexApi extends IndexApi with JestClientCompanion { - _: RefreshApi => - override def index(index: String, id: String, source: String): Boolean = { - Try( - apply().execute( - new Index.Builder(source).index(index).`type`("_doc").id(id).build() - ) - ) match { - case Success(s) => - if (!s.isSucceeded) - logger.error(s.getErrorMessage) - s.isSucceeded && this.refresh(index) - case Failure(f) => - logger.error(f.getMessage, f) - false - } - } - - override def indexAsync(index: String, id: String, source: String)(implicit - ec: ExecutionContext - ): Future[Boolean] = { - import JestClientResultHandler._ - val promise: Promise[Boolean] = Promise() - apply().executeAsyncPromise( - new Index.Builder(source).index(index).`type`("_doc").id(id).build() - ) onComplete { - case Success(s) => promise.success(s.isSucceeded && this.refresh(index)) - case Failure(f) => - logger.error(f.getMessage, f) - promise.failure(f) - } - promise.future - } - -} - -trait JestUpdateApi extends UpdateApi with JestClientCompanion { - _: RefreshApi => - override def update( - index: String, - id: String, - source: String, - upsert: Boolean - ): Boolean = { - Try( - apply().execute( - new Update.Builder( - if (upsert) - docAsUpsert(source) - else - source - ).index(index).`type`("_doc").id(id).build() - ) - ) match { - case Success(s) => - if (!s.isSucceeded) - logger.error(s.getErrorMessage) - s.isSucceeded && this.refresh(index) - case Failure(f) => - logger.error(f.getMessage, f) - false - } - } - - override def updateAsync( - index: String, - id: String, - source: String, - upsert: Boolean - )(implicit ec: ExecutionContext): Future[Boolean] = { - import JestClientResultHandler._ - val promise: Promise[Boolean] = Promise() - apply().executeAsyncPromise( - new Update.Builder( - if (upsert) - docAsUpsert(source) - else - source - ).index(index).`type`("_doc").id(id).build() - ) onComplete { - case Success(s) => - if (!s.isSucceeded) - logger.error(s.getErrorMessage) - promise.success(s.isSucceeded && this.refresh(index)) - case Failure(f) => - logger.error(f.getMessage, f) - promise.failure(f) - } - promise.future - } - -} - -trait JestDeleteApi extends DeleteApi with JestClientCompanion { - _: RefreshApi => - override def delete(uuid: String, index: String): Boolean = { - Try( - apply() - .execute( - new Delete.Builder(uuid).index(index).`type`("_doc").build() - ) - ) match { - case Success(result) => - if (!result.isSucceeded) - logger.error(result.getErrorMessage) - result.isSucceeded && this.refresh(index) - case Failure(f) => - logger.error(f.getMessage, f) - false - } - } - - override def deleteAsync(uuid: String, index: String)(implicit - ec: ExecutionContext - ): Future[Boolean] = { - import JestClientResultHandler._ - val promise: Promise[Boolean] = Promise() - apply().executeAsyncPromise( - new Delete.Builder(uuid).index(index).`type`("_doc").build() - ) onComplete { - case Success(s) => - if (!s.isSucceeded) - logger.error(s.getErrorMessage) - promise.success(s.isSucceeded && this.refresh(index)) - case Failure(f) => - logger.error(f.getMessage, f) - promise.failure(f) - } - promise.future - } - -} - -trait JestGetApi extends GetApi with JestClientCompanion { - - // GetApi - override def get[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], formats: Formats): Option[U] = { - val result = apply().execute( - new Get.Builder( - index.getOrElse( - maybeType.getOrElse( - m.runtimeClass.getSimpleName.toLowerCase - ) - ), - id - ).build() - ) - if (result.isSucceeded) { - Some(serialization.read[U](result.getSourceAsString)) - } else { - logger.error(result.getErrorMessage) - None - } - } - - override def getAsync[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], ec: ExecutionContext, formats: Formats): Future[Option[U]] = { - import JestClientResultHandler._ - val promise: Promise[Option[U]] = Promise() - apply().executeAsyncPromise( - new Get.Builder( - index.getOrElse( - maybeType.getOrElse( - m.runtimeClass.getSimpleName.toLowerCase - ) - ), - id - ).build() - ) onComplete { - case Success(result) => - if (result.isSucceeded) - promise.success(Some(serialization.read[U](result.getSourceAsString))) - else { - logger.error(result.getErrorMessage) - promise.success(None) - } - case Failure(f) => - logger.error(f.getMessage, f) - promise.failure(f) - } - promise.future - } - -} - -trait JestSearchApi extends SearchApi with JestClientCompanion { - - override implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String = - implicitly[ElasticSearchRequest](sqlSearch).query - - import JestClientApi._ - - override def search[U]( - jsonQuery: JSONQuery - )(implicit m: Manifest[U], formats: Formats): List[U] = { - import jsonQuery._ - val search = new Search.Builder(query) - for (indice <- indices) search.addIndex(indice) - for (t <- types) search.addType(t) - Try( - apply() - .execute(search.build()) - .getSourceAsStringList - .asScala - .map(source => serialization.read[U](source)) - .toList - ) match { - case Success(s) => s - case Failure(f) => - logger.error(f.getMessage, f) - List.empty - } - } - - override def searchAsync[U]( - sqlQuery: SQLQuery - )(implicit m: Manifest[U], ec: ExecutionContext, formats: Formats): Future[List[U]] = { - val promise = Promise[List[U]]() - val search: Option[Search] = sqlQuery.jestSearch - search match { - case Some(s) => - import JestClientResultHandler._ - apply().executeAsyncPromise(s) onComplete { - case Success(searchResult) => - promise.success( - searchResult.getSourceAsStringList.asScala - .map(source => serialization.read[U](source)) - .toList - ) - case Failure(f) => - promise.failure(f) - } - case _ => promise.success(List.empty) - } - promise.future - } - - override def searchWithInnerHits[U, I](jsonQuery: JSONQuery, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[(U, List[I])] = { - Try(apply().execute(jsonQuery.search)).toOption match { - case Some(result) => - if (!result.isSucceeded) { - logger.error(result.getErrorMessage) - return List.empty - } - Try(result.getJsonObject ~> [U, I] innerField) match { - case Success(s) => s - case Failure(f) => - logger.error(f.getMessage, f) - List.empty - } - case _ => List.empty - } - } - - override def multiSearch[U]( - jsonQueries: JSONQueries - )(implicit m: Manifest[U], formats: Formats): List[List[U]] = { - tryOrElse( - { - val multiSearchResult = - apply().execute(new MultiSearch.Builder(jsonQueries.queries.map(_.search).asJava).build()) - multiSearchResult.getResponses.asScala - .map(searchResponse => - searchResponse.searchResult.getSourceAsStringList.asScala - .map(source => serialization.read[U](source)) - .toList - ) - .toList - }, - List.empty - )(logger) - } - - override def multiSearchWithInnerHits[U, I](jsonQueries: JSONQueries, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[List[(U, List[I])]] = { - val multiSearch = new MultiSearch.Builder(jsonQueries.queries.map(_.search).asJava).build() - Try(apply().execute(multiSearch)).toOption match { - case Some(multiSearchResult) => - if (!multiSearchResult.isSucceeded) { - logger.error(multiSearchResult.getErrorMessage) - return List.empty - } - multiSearchResult.getResponses.asScala - .map(searchResponse => { - Try(searchResponse.searchResult.getJsonObject ~> [U, I] innerField) match { - case Success(s) => s - case Failure(f) => - logger.error(f.getMessage, f) - List.empty[(U, List[I])] - } - }) - .toList - case _ => List.empty - } - } - -} - -trait JestBulkApi - extends JestRefreshApi - with JestSettingsApi - with JestIndicesApi - with BulkApi - with JestClientCompanion { - override type A = BulkableAction[DocumentResult] - override type R = BulkResult - - override implicit def toBulkElasticAction(a: A): BulkElasticAction = - new BulkElasticAction { - override def index: String = a.getIndex - } - - private[this] def toBulkElasticResultItem(i: BulkResult#BulkResultItem): BulkElasticResultItem = - new BulkElasticResultItem { - override def index: String = i.index - } - - override implicit def toBulkElasticResult(r: R): BulkElasticResult = - new BulkElasticResult { - override def items: List[BulkElasticResultItem] = - r.getItems.asScala.toList.map(toBulkElasticResultItem) - } - - override def bulk(implicit - bulkOptions: BulkOptions, - system: ActorSystem - ): Flow[Seq[A], R, NotUsed] = { - import JestClientResultHandler._ - val parallelism = Math.max(1, bulkOptions.balance) - - Flow[Seq[BulkableAction[DocumentResult]]] - .named("bulk") - .mapAsyncUnordered[BulkResult](parallelism)(items => { - logger.info(s"Starting to write batch of ${items.size}...") - val init = - new Bulk.Builder().defaultIndex(bulkOptions.index).defaultType(bulkOptions.documentType) - val bulkQuery = items.foldLeft(init) { (current, query) => - current.addAction(query) - } - apply().executeAsyncPromise(bulkQuery.build()) - }) - } - - override def bulkResult: Flow[R, Set[String], NotUsed] = - Flow[BulkResult] - .named("result") - .map(result => { - val items = result.getItems - val indices = items.asScala.map(_.index).toSet - logger.info(s"Finished to write batch of ${items.size} within ${indices.mkString(",")}.") - indices - }) - - override def toBulkAction(bulkItem: BulkItem): A = { - val builder = bulkItem.action match { - case BulkAction.DELETE => new Delete.Builder(bulkItem.body) - case BulkAction.UPDATE => new Update.Builder(docAsUpsert(bulkItem.body)) - case _ => new Index.Builder(bulkItem.body) - } - bulkItem.id.foreach(builder.id) - builder.index(bulkItem.index) - bulkItem.parent.foreach(s => builder.setParameter(Parameters.PARENT, s)) - builder.build() - } - -} - -object JestClientApi { +object JestClientApi extends SerializationApi { implicit def requestToSearch(elasticSelect: ElasticSearchRequest): Search = { import elasticSelect._ - Console.println(query) val search = new Search.Builder(query) for (source <- sources) search.addIndex(source) search.build() @@ -843,13 +67,13 @@ object JestClientApi { } } - implicit class SearchJSONQuery(jsonQuery: JSONQuery) { - def search: Search = { - import jsonQuery._ + implicit class SearchElasticQuery(elasticQuery: ElasticQuery) { + def search: (Search, JSONQuery) = { + import elasticQuery._ val _search = new Search.Builder(query) for (indice <- indices) _search.addIndex(indice) for (t <- types) _search.addType(t) - _search.build() + (_search.build(), query) } } diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestClientCompanion.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestClientCompanion.scala index 497d1afe..bbc75feb 100644 --- a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestClientCompanion.scala +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestClientCompanion.scala @@ -16,161 +16,84 @@ package app.softnetwork.elastic.client.jest -import app.softnetwork.elastic.client.{ElasticConfig, ElasticCredentials} +import app.softnetwork.elastic.client.ElasticClientCompanion import com.sksamuel.exts.Logging -import io.searchbox.action.Action import io.searchbox.client.config.HttpClientConfig -import io.searchbox.client.{JestClient, JestClientFactory, JestResult, JestResultHandler} -import org.apache.http.HttpHost +import io.searchbox.client.{JestClient, JestClientFactory} +import io.searchbox.core.Cat -import java.io.IOException -import java.util import java.util.concurrent.TimeUnit -import scala.collection.JavaConverters._ -//import scala.jdk.CollectionConverters._ +import scala.jdk.CollectionConverters._ import scala.language.reflectiveCalls -import scala.util.{Failure, Success, Try} /** Created by smanciot on 20/05/2021. */ -trait JestClientCompanion extends Logging { - - def elasticConfig: ElasticConfig - - private[this] var jestClient: Option[InnerJestClient] = None - - private[this] val factory = new JestClientFactory() - - private[this] var httpClientConfig: HttpClientConfig = _ - - private[this] class InnerJestClient(private var _jestClient: JestClient) extends JestClient { - private[this] var nbFailures: Int = 0 - - override def shutdownClient(): Unit = { - close() - } - - private def checkClient(): Unit = { - Option(_jestClient) match { - case None => - factory.setHttpClientConfig(httpClientConfig) - _jestClient = Try(factory.getObject) match { - case Success(s) => - s - case Failure(f) => - logger.error(f.getMessage, f) - throw f - } - case _ => - } - } - - override def executeAsync[J <: JestResult]( - clientRequest: Action[J], - jestResultHandler: JestResultHandler[_ >: J] - ): Unit = { - Try(checkClient()) - Option(_jestClient) match { - case Some(s) => s.executeAsync[J](clientRequest, jestResultHandler) - case _ => - close() - jestResultHandler.failed(new Exception("JestClient not initialized")) - } +trait JestClientCompanion extends ElasticClientCompanion[JestClient] with Logging { + + /** Create and configure Elasticsearch Client + */ + override protected def createClient(): JestClient = { + try { + val factory = new JestClientFactory() + factory.setHttpClientConfig(buildHttpConfig()) + factory.getObject + } catch { + case ex: Exception => + logger.error(s"Failed to create JestClient: ${ex.getMessage}", ex) + throw new IllegalStateException("Cannot create Elasticsearch client", ex) } + } - override def execute[J <: JestResult](clientRequest: Action[J]): J = { - Try(checkClient()) - Option(_jestClient) match { - case Some(j) => - Try(j.execute[J](clientRequest)) match { - case Success(s) => - nbFailures = 0 - s - case Failure(f) => - f match { - case e: IOException => - nbFailures += 1 - logger.error(e.getMessage, e) - close() - if (nbFailures < 10) { - Thread.sleep(1000 * nbFailures) - execute(clientRequest) - } else { - throw f - } - case e: IllegalStateException => - nbFailures += 1 - logger.error(e.getMessage, e) - close() - if (nbFailures < 10) { - Thread.sleep(1000 * nbFailures) - execute(clientRequest) - } else { - throw f - } - case _ => - close() - throw f - } - } - case _ => - close() - throw new Exception("JestClient not initialized") + /** Test connection to Elasticsearch cluster + * + * @return + * true if connection is successful + */ + override def testConnection(): Boolean = { + try { + val c = apply() + val result = c.execute(new Cat.NodesBuilder().build()) + if (result.isSucceeded) { + logger.info(s"Connected to Elasticsearch ${result.getJsonString}") + true + } else { + logger.error(s"Failed to connect to Elasticsearch: ${result.getErrorMessage}") + incrementFailures() + false } - } - - override def setServers(servers: util.Set[String]): Unit = { - Try(checkClient()) - Option(_jestClient).foreach(_.setServers(servers)) - } - - override def close(): Unit = { - Option(_jestClient).foreach(_.close()) - _jestClient = null + } catch { + case ex: Exception => + logger.error(s"Failed to connect to Elasticsearch: ${ex.getMessage}", ex) + incrementFailures() + false } } - private[this] def getHttpHosts(esUrl: String): Set[HttpHost] = { - esUrl - .split(",") - .map(u => { - val url = new java.net.URL(u) - new HttpHost(url.getHost, url.getPort, url.getProtocol) - }) - .toSet - } - - def apply(): JestClient = { - apply( - elasticConfig.credentials, - multithreaded = elasticConfig.multithreaded, - discoveryEnabled = elasticConfig.discoveryEnabled - ) - } - - def apply( - esCredentials: ElasticCredentials, - multithreaded: Boolean = true, - timeout: Int = 60000, - discoveryEnabled: Boolean = false, - discoveryFrequency: Long = 60L, - discoveryFrequencyTimeUnit: TimeUnit = TimeUnit.SECONDS - ): JestClient = { - jestClient match { - case Some(s) => s - case None => - httpClientConfig = new HttpClientConfig.Builder(esCredentials.url) - .defaultCredentials(esCredentials.username, esCredentials.password) - .preemptiveAuthTargetHosts(getHttpHosts(esCredentials.url).asJava) - .multiThreaded(multithreaded) - .discoveryEnabled(discoveryEnabled) - .discoveryFrequency(discoveryFrequency, discoveryFrequencyTimeUnit) - .connTimeout(timeout) - .readTimeout(timeout) - .build() - factory.setHttpClientConfig(httpClientConfig) - jestClient = Some(new InnerJestClient(factory.getObject)) - jestClient.get - } + private def buildHttpConfig(): HttpClientConfig = { + val httpHosts = + elasticConfig.credentials.url + .split(",") + .map(u => { + parseHttpHost(u) + }) + .toSet + + new HttpClientConfig.Builder(elasticConfig.credentials.url) + .defaultCredentials( + elasticConfig.credentials.username, + elasticConfig.credentials.password + ) + .preemptiveAuthTargetHosts(httpHosts.asJava) + .multiThreaded(true) //elasticConfig.multithreaded + .discoveryEnabled(elasticConfig.discovery.enabled) + .discoveryFrequency( + elasticConfig.discovery.frequency.getSeconds, + TimeUnit.SECONDS + ) + .connTimeout(elasticConfig.connectionTimeout.toMillis.toInt) + .readTimeout(elasticConfig.socketTimeout.toMillis.toInt) + .maxTotalConnection(100) + .defaultMaxTotalConnectionPerRoute(50) + .build() } } diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestClientHelpers.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestClientHelpers.scala new file mode 100644 index 00000000..3b85ae2f --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestClientHelpers.scala @@ -0,0 +1,397 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.ElasticClientHelpers +import app.softnetwork.elastic.client.result.{ElasticError, ElasticResult} +import io.searchbox.action.Action +import io.searchbox.client.JestResult + +import scala.concurrent.Promise +import scala.reflect.ClassTag +import scala.util.{Failure, Success, Try} + +trait JestClientHelpers extends ElasticClientHelpers { _: JestClientCompanion => + + // ======================================================================== + // GENERIC METHODS FOR EXECUTIVE JEST ACTIONS + // ======================================================================== + + //format:off + /** Execute a Jest action with a generic transformation of the result. This generic method allows + * you to execute any Jest action and transform the result into a type T using a transformation + * function. + * @tparam R + * type of the Jest result (usually JestResult or a subclass) + * @tparam T + * type of the desired final result + * @param operation + * name of the operation (for logging and error context) + * @param index + * relevant index (optional, for logging) + * @param retryable + * true if the operation can be retried in case of a transient error + * @param action + * function constructing the Jest action to be executed + * @param transformer + * function transforming the JestResult into T + * @return + * ElasticResult[T] + * + * @example + * {{{ + * executeJestAction[JestResult, Boolean]( + * operation = "createIndex", + * index = Some("my-index"), + * retryable = false + * )( + * action = new CreateIndex.Builder("my-index").build() + * )( + * transformer = result => result.isSucceeded + * ) + * }}} + */ + //format:on + private[client] def executeJestAction[R <: JestResult: ClassTag, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Action[R] + )( + transformer: R => T + ): ElasticResult[T] = { + + val indexStr = index.map(i => s" on index '$i'").getOrElse("") + + logger.debug(s"Executing operation '$operation'$indexStr") + + // ✅ Execution with exception handling + val tryResult: Try[R] = Try { + apply().execute(action) + } + + // ✅ Conversion to ElasticResult[R] + val elasticResult: ElasticResult[R] = tryResult match { + case Success(result) => + ElasticResult.success(result) + + case Failure(ex) => + logger.error(s"Exception during operation '$operation'$indexStr: ${ex.getMessage}", ex) + ElasticResult.failure( + ElasticError( + message = s"Exception during $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + } + + // ✅ Jest success check and transformation + elasticResult.flatMap { result => + if (result.isSucceeded) { + // ✅ Success: applying the transformation + Try(transformer(result)) match { + case Success(transformed) => + logger.debug(s"Operation '$operation'$indexStr succeeded") + ElasticResult.success(transformed) + + case Failure(ex) => + logger.error(s"Transformation failed for operation '$operation'$indexStr", ex) + ElasticResult.failure( + ElasticError( + message = s"Failed to transform result: ${ex.getMessage}", + cause = Some(ex), + statusCode = Some(result.getResponseCode), + operation = Some(operation) + ) + ) + } + } else { + // ✅ Failure: extract the error + val errorMessage = Option(result.getErrorMessage) + .filter(_.nonEmpty) + .getOrElse("Unknown error") + + val statusCode = result.getResponseCode match { + case 0 => None // No HTTP response + case code => Some(code) + } + + val error = ElasticError( + message = errorMessage, + cause = None, + statusCode = statusCode, + operation = Some(operation) + ) + + // ✅ Log according to severity + logError(operation, indexStr, error) + + ElasticResult.failure(error) + } + } + } + + /** Simplified variant for operations returning Boolean values. + * + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param action + * function constructing the Jest action + * @return + * ElasticResult[Boolean] + */ + private[client] def executeJestBooleanAction[R <: JestResult: ClassTag]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Action[R] + ): ElasticResult[Boolean] = { + executeJestAction[R, Boolean](operation, index, retryable)(action)(_.isSucceeded) + } + + //format:off + /** Variant to execute an action and extract a specific field from the JSON. + * @tparam R + * type of the Jest result + * @tparam T + * type of the final result + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param action + * function constructing the Jest action + * @param extractor + * function extracting T from the JsonObject of the result + * @return + * ElasticResult[T] + * + * @example + * {{{ + * executeJestWithExtractor[JestResult, Int]( + * operation = "getDocCount", + * index = Some("my-index") + * )( + * action = new Count.Builder().addIndex("my-index").build() + * )( + * extractor = json => json.get("count").getAsInt + * ) + * }}} + */ + //format:on + private[client] def executeJestWithExtractor[R <: JestResult: ClassTag, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Action[R] + )( + extractor: com.google.gson.JsonObject => T + ): ElasticResult[T] = { + executeJestAction[R, T](operation, index, retryable)(action) { result => + extractor(result.getJsonObject) + } + } + + //format:off + /** Variant to execute an action and parse the complete JSON. + * @tparam R + * type of the Jest result + * @tparam T + * type of the final result (typically a case class) + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param action + * function constructing the Jest action + * @param parser + * function parsing the JSON into T (uses json4s or another) + * @return + * ElasticResult[T] + * + * @example + * {{{ + * case class IndexStats(docsCount: Long, storeSize: String) + * + * executeJestWithParser[JestResult, IndexStats]( + * operation = "getIndexStats", + * index = Some("my-index") + * )( + * action = new Stats.Builder().addIndex("my-index").build() + * )( + * parser = json => { + * implicit val formats = DefaultFormats + * parse(json).extract[IndexStats] + * } + * ) + * }}} + */ + //format:on + private[client] def executeJestWithParser[R <: JestResult: ClassTag, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Action[R] + )( + parser: String => T + ): ElasticResult[T] = { + executeJestAction[R, T](operation, index, retryable)(action) { result => + parser(result.getJsonString) + } + } + + //format:off + /** Asynchronous variant to execute a Jest action with a generic transformation of the result. + * @tparam R + * type of the Jest result (usually JestResult or a subclass) + * @tparam T + * type of the desired final result + * @param operation + * name of the operation (for logging and error context) + * @param index + * relevant index (optional, for logging) + * @param retryable + * true if the operation can be retried in case of a transient error + * @param action + * function constructing the Jest action to be executed + * @param transformer + * function transforming the JestResult into T + * @return + * Future ElasticResult[T] + * + * @example + * {{{ + * executeAsyncJestAction[JestResult, Boolean]( + * operation = "createIndex", + * index = Some("my-index"), + * retryable = false + * )( + * action = new CreateIndex.Builder("my-index").build() + * )( + * transformer = result => result.isSucceeded + * ) + * }}} + */ + //format:on + private[client] def executeAsyncJestAction[R <: JestResult: ClassTag, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Action[R] + )( + transformer: R => T + )(implicit ec: scala.concurrent.ExecutionContext): scala.concurrent.Future[ElasticResult[T]] = { + val indexStr = index.map(i => s" on index '$i'").getOrElse("") + + logger.debug(s"Executing operation '$operation'$indexStr asynchronously") + + val promise: Promise[ElasticResult[T]] = Promise() + import JestClientResultHandler._ + apply().executeAsyncPromise(action) onComplete { + case Success(result) => + if (result.isSucceeded) { + logger.debug(s"Operation '$operation'$indexStr succeeded asynchronously") + // ✅ Success: applying the transformation + Try(transformer(result)) match { + case Success(transformed) => + promise.success(ElasticResult.success(transformed)) + case Failure(ex) => + logger.error(s"Transformation failed for operation '$operation'$indexStr", ex) + promise.success( + ElasticResult.failure( + ElasticError( + message = s"Failed to transform result: ${ex.getMessage}", + cause = Some(ex), + statusCode = Some(result.getResponseCode), + operation = Some(operation) + ) + ) + ) + } + } else { + // ✅ Failure: extract the error + val errorMessage = Option(result.getErrorMessage) + .filter(_.nonEmpty) + .getOrElse("Unknown error") + val statusCode = result.getResponseCode match { + case 0 => None // No HTTP response + case code => Some(code) + } + val error = ElasticError( + message = errorMessage, + cause = None, + statusCode = statusCode, + operation = Some(operation) + ) + // ✅ Log according to severity + logError(operation, indexStr, error) + promise.success(ElasticResult.failure(error)) + } + case Failure(ex) => + logger.error(s"Exception during operation '$operation'$indexStr: ${ex.getMessage}", ex) + val error = ElasticError( + message = s"Exception during $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + promise.success(ElasticResult.failure(error)) + } + + promise.future + } + + /** Simplified asynchronous variant for operations returning Boolean values. + * + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param action + * function constructing the Jest action + * @return + * Future ElasticResult[Boolean] + */ + private[client] def executeAsyncJestBooleanAction[R <: JestResult: ClassTag]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Action[R] + )(implicit + ec: scala.concurrent.ExecutionContext + ): scala.concurrent.Future[ElasticResult[Boolean]] = { + executeAsyncJestAction[R, Boolean](operation, index, retryable)(action)(_.isSucceeded) + } +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestCountApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestCountApi.scala new file mode 100644 index 00000000..0a951160 --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestCountApi.scala @@ -0,0 +1,78 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.{CountApi, ElasticQuery} +import app.softnetwork.elastic.client.result.ElasticResult +import io.searchbox.core.Count + +import scala.concurrent.{ExecutionContext, Future} + +/** Count API for Jest (Elasticsearch HTTP Client). + * @see + * [[CountApi]] for generic API documentation + */ +trait JestCountApi extends CountApi with JestClientHelpers { _: JestClientCompanion => + + /** Count documents matching a query. + * @see + * [[CountApi.count]] + */ + override private[client] def executeCount(query: ElasticQuery): ElasticResult[Option[Double]] = + executeJestAction( + operation = "count", + index = Some(query.indices.mkString(",")) + ) { + val count = new Count.Builder().query(query.query) + import query._ + for (indice <- indices) count.addIndex(indice) + for (t <- types) count.addType(t) + count.build() + } { result => + if (result.isSucceeded) { + Some(result.getCount) + } else { + None + } + } + + /** Count documents matching a query asynchronously. + * @see + * [[CountApi.countAsync]] + */ + override private[client] def executeCountAsync( + elasticQuery: ElasticQuery + )(implicit ec: ExecutionContext): Future[ElasticResult[Option[Double]]] = { + executeAsyncJestAction( + operation = "countAsync", + index = Some(elasticQuery.indices.mkString(",")) + ) { + import elasticQuery._ + val count = new Count.Builder().query(query) + for (indice <- indices) count.addIndex(indice) + for (t <- types) count.addType(t) + count.build() + } { result => + if (result.isSucceeded) { + Some(result.getCount) + } else { + None + } + } + } + +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestDeleteApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestDeleteApi.scala new file mode 100644 index 00000000..88b15871 --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestDeleteApi.scala @@ -0,0 +1,56 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.{DeleteApi, RefreshApi} +import app.softnetwork.elastic.client.result.ElasticResult +import io.searchbox.core.Delete + +import scala.concurrent.{ExecutionContext, Future} + +/** Delete Management API for Jest (Elasticsearch HTTP Client). + * @see + * [[DeleteApi]] for generic API documentation + */ +trait JestDeleteApi extends DeleteApi with JestClientHelpers { + _: RefreshApi with JestClientCompanion => + + /** Delete an entity from the given index. + * @see + * [[DeleteApi.delete]] + */ + private[client] def executeDelete(index: String, id: String): ElasticResult[Boolean] = + executeJestBooleanAction( + operation = "delete", + index = Some(index), + retryable = true + ) { + new Delete.Builder(id).index(index).`type`("_doc").build() + } + + override private[client] def executeDeleteAsync(index: String, id: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = + executeAsyncJestAction( + operation = "delete", + index = Some(index), + retryable = true + ) { + new Delete.Builder(id).index(index).`type`("_doc").build() + }(result => result.isSucceeded) + +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestFlushApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestFlushApi.scala new file mode 100644 index 00000000..2d457db2 --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestFlushApi.scala @@ -0,0 +1,46 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.FlushApi +import app.softnetwork.elastic.client.result.ElasticResult +import io.searchbox.indices.Flush + +/** Flush management API for Jest (Elasticsearch HTTP Client). + * @see + * [[FlushApi]] for generic API documentation + */ +trait JestFlushApi extends FlushApi with JestClientHelpers { _: JestClientCompanion => + + /** Flush the index to make sure all operations are written to disk. + * @see + * [[FlushApi.flush]] + */ + private[client] def executeFlush( + index: String, + force: Boolean, + wait: Boolean + ): ElasticResult[Boolean] = { + executeJestBooleanAction( + operation = "flush", + index = Some(index), + retryable = true + )( + new Flush.Builder().addIndex(index).force(force).waitIfOngoing(wait).build() + ) + } +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestGetApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestGetApi.scala new file mode 100644 index 00000000..6754a268 --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestGetApi.scala @@ -0,0 +1,76 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.result.ElasticResult +import app.softnetwork.elastic.client.{result, GetApi, SerializationApi} +import io.searchbox.core.Get + +import scala.concurrent.{ExecutionContext, Future} + +/** Get API for Jest (Elasticsearch HTTP Client). + * @see + * [[GetApi]] for generic API documentation + */ +trait JestGetApi extends GetApi with JestClientHelpers { + _: JestClientCompanion with SerializationApi => + + /** Get a document by id. + * @see + * [[GetApi.get]] + */ + override private[client] def executeGet( + index: String, + id: String + ): result.ElasticResult[Option[String]] = + executeJestAction( + operation = "get", + index = Some(index), + retryable = true + ) { + new Get.Builder(index, id).build() + } { result => + if (result.isSucceeded) { + Some(result.getSourceAsString) + } else { + None + } + } + + /** Get a document by its id from the given index asynchronously. + * @see + * [[GetApi.getAsync]] + */ + override private[client] def executeGetAsync( + index: String, + id: String + )(implicit ec: ExecutionContext): Future[ElasticResult[Option[String]]] = + executeAsyncJestAction( + operation = "get", + index = Some(index), + retryable = true + ) { + new Get.Builder(index, id).build() + } { result => + if (result.isSucceeded) { + Some(result.getSourceAsString) + } else { + None + } + } + +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestIndexApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestIndexApi.scala new file mode 100644 index 00000000..caa54838 --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestIndexApi.scala @@ -0,0 +1,64 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.{IndexApi, RefreshApi, SerializationApi} +import app.softnetwork.elastic.client.result.ElasticResult +import io.searchbox.core.Index + +import scala.concurrent.{ExecutionContext, Future} + +/** Index Management API for Jest (Elasticsearch HTTP Client). + * @see + * [[IndexApi]] for generic API documentation + */ +trait JestIndexApi extends IndexApi with JestClientHelpers { + _: RefreshApi with JestClientCompanion with SerializationApi => + + /** Index a document in the given index. + * @see + * [[IndexApi.indexAs]] + */ + override private[client] def executeIndex( + index: String, + id: String, + source: String + ): ElasticResult[Boolean] = + executeJestBooleanAction( + operation = "index", + index = Some(index), + retryable = true + )( + new Index.Builder(source).index(index).`type`("_doc").id(id).build() + ) + + /** Index a document in the given index asynchronously. + * @see + * [[IndexApi.indexAsyncAs]] + */ + override private[client] def executeIndexAsync(index: String, id: String, source: String)(implicit + ec: ExecutionContext + ): Future[ElasticResult[Boolean]] = + executeAsyncJestBooleanAction( + operation = "indexAsync", + index = Some(index), + retryable = true + )( + new Index.Builder(source).index(index).`type`("_doc").id(id).build() + ) + +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestIndicesApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestIndicesApi.scala new file mode 100644 index 00000000..13f93df5 --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestIndicesApi.scala @@ -0,0 +1,134 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.IndicesApi +import app.softnetwork.elastic.client.result.ElasticResult +import io.searchbox.client.JestResult +import io.searchbox.indices.{CloseIndex, CreateIndex, DeleteIndex, IndicesExists, OpenIndex} +import io.searchbox.indices.reindex.Reindex + +import scala.util.Try + +/** Index management API for Jest (Elasticsearch HTTP Client). + * @see + * [[IndicesApi]] for generic API documentation + */ +trait JestIndicesApi extends IndicesApi with JestRefreshApi with JestClientHelpers { + _: JestClientCompanion => + + /** Create an index with the given settings. + * @see + * [[IndicesApi.createIndex]] + */ + private[client] def executeCreateIndex( + index: String, + settings: String = defaultSettings + ): ElasticResult[Boolean] = { + executeJestBooleanAction( + operation = "createIndex", + index = Some(index), + retryable = false // Creation can not be retried + ) { + new CreateIndex.Builder(index) + .settings(settings) + .build() + } + } + + /** Delete an index. + * @see + * [[IndicesApi.deleteIndex]] + */ + private[client] def executeDeleteIndex(index: String): ElasticResult[Boolean] = { + executeJestBooleanAction( + operation = "deleteIndex", + index = Some(index), + retryable = false // Deletion can not be retried + ) { + new DeleteIndex.Builder(index).build() + } + } + + /** Close an index. + * @see + * [[IndicesApi.closeIndex]] + */ + private[client] def executeCloseIndex(index: String): ElasticResult[Boolean] = { + executeJestBooleanAction( + operation = "closeIndex", + index = Some(index), + retryable = true + ) { + new CloseIndex.Builder(index).build() + } + } + + /** Open an index. + * @see + * [[IndicesApi.openIndex]] + */ + override def executeOpenIndex(index: String): ElasticResult[Boolean] = { + executeJestBooleanAction( + operation = "openIndex", + index = Some(index), + retryable = true + ) { + new OpenIndex.Builder(index).build() + } + } + + /** Reindex documents from a source index to a target index. + * @see + * [[IndicesApi.reindex]] + */ + private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): ElasticResult[(Boolean, Option[Long])] = + executeJestAction[JestResult, (Boolean, Option[Long])]( + operation = "reindex", + index = Some(s"$sourceIndex -> $targetIndex"), + retryable = true + ) { + new Reindex.Builder( + s"""{"index": "$sourceIndex"}""", + s"""{"index": "$targetIndex"}""" + ).build() + } { result => + val success = result.isSucceeded + val docsReindexed = Try { + result.getJsonObject.get("total").getAsLong + }.toOption + (success, docsReindexed) + } + + /** Check if an index exists. + * @see + * [[IndicesApi.indexExists]] + */ + private[client] def executeIndexExists(index: String): ElasticResult[Boolean] = { + executeJestBooleanAction( + operation = "indexExists", + index = Some(index), + retryable = true + ) { + new IndicesExists.Builder(index).build() + } + } +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestMappingApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestMappingApi.scala new file mode 100644 index 00000000..f9a3cec0 --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestMappingApi.scala @@ -0,0 +1,100 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.{IndicesApi, MappingApi, RefreshApi, SettingsApi} +import app.softnetwork.elastic.client.result.{ + ElasticError, + ElasticFailure, + ElasticResult, + ElasticSuccess +} +import com.google.gson.JsonParser +import io.searchbox.indices.mapping.{GetMapping, PutMapping} + +import scala.util.Try + +/** Mapping management API for Jest (Elasticsearch HTTP Client). + * @see + * [[MappingApi]] for generic API documentation + */ +trait JestMappingApi extends MappingApi with JestClientHelpers { + _: SettingsApi with IndicesApi with RefreshApi with JestClientCompanion => + + /** Set the mapping for an index. + * @see + * [[MappingApi.setMapping]] + */ + private[client] def executeSetMapping(index: String, mapping: String): ElasticResult[Boolean] = { + executeJestBooleanAction( + operation = "setMapping", + index = Some(index), + retryable = false + )( + new PutMapping.Builder(index, "_doc", mapping).build() + ) + } + + private[client] def executeGetMapping(index: String): ElasticResult[String] = + executeJestAction( + operation = "getMapping", + index = Some(index), + retryable = true + )( + new GetMapping.Builder().addIndex(index).build() + ) { result => + result.getJsonString + } + + /** Get the mapping properties of an index. + * + * @param index + * - the name of the index to get the mapping properties for + * @return + * the mapping properties of the index as a JSON string + */ + override def getMappingProperties(index: String): ElasticResult[String] = { + getMapping(index).flatMap { jsonString => + // ✅ Extracting mapping from JSON + ElasticResult.attempt( + new JsonParser().parse(jsonString).getAsJsonObject + ) match { + case ElasticFailure(error) => + logger.error(s"❌ Failed to parse JSON mapping for index '$index': ${error.message}") + return ElasticFailure(error.copy(operation = Some("getMapping"), index = Some(index))) + case ElasticSuccess(indexObj) => + if (Option(indexObj).isDefined && indexObj.has(index)) { + val settingsObj = indexObj + .getAsJsonObject(index) + .getAsJsonObject("mappings") + .getAsJsonObject("_doc") + ElasticSuccess(settingsObj.toString) + } else { + val message = s"Index '$index' not found in the loaded mapping." + logger.error(s"❌ $message") + ElasticFailure( + ElasticError( + message = message, + operation = Some("getMapping"), + index = Some(index) + ) + ) + } + } + } + } +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestRefreshApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestRefreshApi.scala new file mode 100644 index 00000000..238858ef --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestRefreshApi.scala @@ -0,0 +1,41 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.RefreshApi +import app.softnetwork.elastic.client.result.ElasticResult +import io.searchbox.indices.Refresh + +/** Refresh API for Jest (Elasticsearch HTTP Client). + * @see + * [[RefreshApi]] for generic API documentation + */ +trait JestRefreshApi extends RefreshApi with JestClientHelpers { _: JestClientCompanion => + + /** Refresh the index to make sure all documents are indexed and searchable. + * @see + * [[RefreshApi.refresh]] + */ + private[client] def executeRefresh(index: String): ElasticResult[Boolean] = + executeJestBooleanAction( + operation = "refresh", + index = Some(index), + retryable = true + ) { + new Refresh.Builder().addIndex(index).build() + } +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestScrollApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestScrollApi.scala new file mode 100644 index 00000000..97f6f6b1 --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestScrollApi.scala @@ -0,0 +1,304 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import akka.NotUsed +import akka.actor.ActorSystem +import akka.stream.scaladsl.Source +import app.softnetwork.elastic.client.{ + retryWithBackoff, + ElasticQuery, + ElasticResponse, + ScrollApi, + SearchApi, + VersionApi +} +import app.softnetwork.elastic.client.scroll.ScrollConfig +import app.softnetwork.elastic.sql.query.SQLAggregation +import com.google.gson.{JsonNull, JsonObject, JsonParser} +import io.searchbox.core.{ClearScroll, Search, SearchScroll} +import io.searchbox.params.Parameters + +import java.io.IOException +import scala.jdk.CollectionConverters._ +import scala.concurrent.{ExecutionContext, Future} +import scala.util.{Failure, Success, Try} + +trait JestScrollApi extends ScrollApi with JestClientHelpers { + _: VersionApi with SearchApi with JestClientCompanion => + + /** Classic scroll (works for both hits and aggregations) + */ + override private[client] def scrollClassic( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation], + config: ScrollConfig + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = { + implicit val ec: ExecutionContext = system.dispatcher + Source + .unfoldAsync[Option[String], Seq[Map[String, Any]]](None) { scrollIdOpt => + retryWithBackoff(config.retryConfig) { + Future { + scrollIdOpt match { + case None => + logger.info( + s"Starting classic scroll on indices: ${elasticQuery.indices.mkString(", ")}" + ) + + val searchBuilder = + new Search.Builder(elasticQuery.query) + .setParameter(Parameters.SIZE, config.scrollSize) + .setParameter(Parameters.SCROLL, config.keepAlive) + + for (indice <- elasticQuery.indices) searchBuilder.addIndex(indice) + for (t <- elasticQuery.types) searchBuilder.addType(t) + + val result = apply().execute(searchBuilder.build()) + if (!result.isSucceeded) { + throw new IOException(s"Initial scroll failed: ${result.getErrorMessage}") + } + + val scrollId = result.getJsonObject.get("_scroll_id").getAsString + + // Extract ALL results (hits + aggregations) + val results = + extractAllResultsFromJest(result.getJsonObject, fieldAliases, aggregations) + + logger.info( + s"Initial scroll returned ${results.size} results, scrollId: $scrollId" + ) + + if (results.isEmpty) { + None + } else { + Some((Some(scrollId), results)) + } + + case Some(scrollId) => + logger.debug(s"Fetching next scroll batch (scrollId: $scrollId)") + + val scrollBuilder = new SearchScroll.Builder(scrollId, config.keepAlive) + + val result = apply().execute(scrollBuilder.build()) + if (!result.isSucceeded) { + // Lancer une exception pour trigger le retry + throw new IOException(s"Scroll failed: ${result.getErrorMessage}") + } + val newScrollId = result.getJsonObject.get("_scroll_id").getAsString + val results = + extractAllResultsFromJest(result.getJsonObject, fieldAliases, aggregations) + + logger.debug(s"Scroll returned ${results.size} results") + + if (results.isEmpty) { + clearJestScroll(scrollId) + None + } else { + Some((Some(newScrollId), results)) + } + } + } + }(system, logger).recover { case ex: Exception => + logger.error(s"Scroll failed after retries: ${ex.getMessage}", ex) + scrollIdOpt.foreach(clearJestScroll) + None + } + } + .mapConcat(identity) + } + + /** Search After (only for hits, more efficient) + */ + override private[client] def searchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean = false + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = { + implicit val ec: ExecutionContext = system.dispatcher + Source + .unfoldAsync[Option[Seq[Any]], Seq[Map[String, Any]]](None) { searchAfterOpt => + retryWithBackoff(config.retryConfig) { + Future { + searchAfterOpt match { + case None => + logger.info( + s"Starting search_after on indices: ${elasticQuery.indices.mkString(", ")}" + ) + case Some(values) => + logger.debug(s"Fetching next search_after batch (after: ${values.mkString(", ")})") + } + + val queryJson = new JsonParser().parse(elasticQuery.query).getAsJsonObject + + // Check if sorts already exist in the query + if (!hasSorts && !queryJson.has("sort")) { + // No sorting defined, add _id by default + logger.warn( + "No sort fields in query for search_after, adding default _id sort. " + + "This may lead to inconsistent results if documents are updated during scroll." + ) + val sortArray = new com.google.gson.JsonArray() + val sortObj = new JsonObject() + sortObj.addProperty("_id", "asc") + sortArray.add(sortObj) + queryJson.add("sort", sortArray) + } else if (hasSorts && queryJson.has("sort")) { + // Sorts already present, check that a tie-breaker exists + val existingSorts = queryJson.getAsJsonArray("sort") + val hasIdSort = existingSorts.asScala.exists { sortElem => + sortElem.isJsonObject && sortElem.getAsJsonObject.has("_id") + } + if (!hasIdSort) { + // Add _id as tie-breaker + logger.debug("Adding _id as tie-breaker to existing sorts") + val tieBreaker = new JsonObject() + tieBreaker.addProperty("_id", "asc") + existingSorts.add(tieBreaker) + } + } + + queryJson.addProperty("size", config.scrollSize) + + // Add search_after + searchAfterOpt.foreach { searchAfter => + val searchAfterArray = new com.google.gson.JsonArray() + searchAfter.foreach { + case s: String => searchAfterArray.add(s) + case n: Number => searchAfterArray.add(n) + case b: Boolean => searchAfterArray.add(b) + case null => searchAfterArray.add(JsonNull.INSTANCE) + case other => searchAfterArray.add(other.toString) + } + queryJson.add("search_after", searchAfterArray) + } + + val searchBuilder = new Search.Builder(queryJson.toString) + for (indice <- elasticQuery.indices) searchBuilder.addIndex(indice) + for (t <- elasticQuery.types) searchBuilder.addType(t) + + val result = apply().execute(searchBuilder.build()) + + if (!result.isSucceeded) { + throw new IOException(s"Search after failed: ${result.getErrorMessage}") + } + // Extract ONLY hits (no aggregations) + val hits = extractHitsOnlyFromJest(result.getJsonObject, fieldAliases) + + if (hits.isEmpty) { + None + } else { + val hitsArray = result.getJsonObject + .getAsJsonObject("hits") + .getAsJsonArray("hits") + + val lastHit = hitsArray.get(hitsArray.size() - 1).getAsJsonObject + val nextSearchAfter = if (lastHit.has("sort")) { + Some( + lastHit + .getAsJsonArray("sort") + .asScala + .map { elem => + if (elem.isJsonPrimitive) { + val prim = elem.getAsJsonPrimitive + if (prim.isString) prim.getAsString + else if (prim.isBoolean) prim.getAsBoolean + else if (prim.isNumber) { + val num = prim.getAsNumber + if (num.toString.contains(".")) num.doubleValue() + else num.longValue() + } else prim.getAsString + } else if (elem.isJsonNull) { + null + } else { + elem.toString + } + } + .toSeq + ) + } else { + None + } + + Some((nextSearchAfter, hits)) + } + } + }(system, logger).recover { case ex: Exception => + logger.error(s"Search after failed after retries: ${ex.getMessage}", ex) + None + } + } + .mapConcat(identity) + } + + override private[client] def pitSearchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = + throw new NotImplementedError("PIT search after not implemented for Elasticsearch 6") + + /** Extract ALL results: hits + aggregations + */ + private def extractAllResultsFromJest( + jsonObject: JsonObject, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + ): Seq[Map[String, Any]] = { + val jsonString = jsonObject.toString + val sqlResponse = + ElasticResponse("", jsonString, fieldAliases, aggregations.map(kv => kv._1 -> kv._2)) + + parseResponse(sqlResponse) match { + case Success(rows) => rows + case Failure(ex) => + logger.error(s"Failed to parse Jest scroll response: ${ex.getMessage}", ex) + Seq.empty + } + } + + /** Extract ONLY hits (for search_after) + */ + private def extractHitsOnlyFromJest( + jsonObject: JsonObject, + fieldAliases: Map[String, String] + ): Seq[Map[String, Any]] = { + val jsonString = jsonObject.toString + val sqlResponse = ElasticResponse("", jsonString, fieldAliases, Map.empty) + + parseResponse(sqlResponse) match { + case Success(rows) => rows + case Failure(ex) => + logger.error(s"Failed to parse Jest search after response: ${ex.getMessage}", ex) + Seq.empty + } + } + + private def clearJestScroll(scrollId: String): Unit = { + Try { + logger.debug(s"Clearing Jest scroll: $scrollId") + val clearScroll = new ClearScroll.Builder() + .addScrollId(scrollId) + .build() + apply().execute(clearScroll) + }.recover { case ex: Exception => + logger.warn(s"Failed to clear Jest scroll $scrollId: ${ex.getMessage}") + } + } +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestSearchApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestSearchApi.scala new file mode 100644 index 00000000..36e0881a --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestSearchApi.scala @@ -0,0 +1,121 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.{ElasticQueries, ElasticQuery, SearchApi, SerializationApi} +import app.softnetwork.elastic.client.result.ElasticResult +import app.softnetwork.elastic.sql.bridge.ElasticSearchRequest +import app.softnetwork.elastic.sql.query.SQLSearchRequest +import io.searchbox.core.MultiSearch + +import scala.concurrent.{ExecutionContext, Future} +import scala.jdk.CollectionConverters._ +import scala.language.implicitConversions + +trait JestSearchApi extends SearchApi with JestClientHelpers { + _: JestClientCompanion with SerializationApi => + + private[client] implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String = + implicitly[ElasticSearchRequest](sqlSearch).query + + import JestClientApi._ + + override def executeSingleSearch( + elasticQuery: ElasticQuery + ): ElasticResult[Option[String]] = + executeJestAction( + operation = "executeSingleSearch", + index = Some(elasticQuery.indices.mkString(",")), + retryable = true + ) { + elasticQuery.search._1 + }(result => + if (result.isSucceeded) { + Some(result.getJsonString) + } else { + None + } + ) + + private[client] def executeMultiSearch( + elasticQueries: ElasticQueries + ): ElasticResult[Option[String]] = + executeJestAction( + operation = "executeMultiSearch", + index = Some( + elasticQueries.queries + .flatMap(_.indices) + .distinct + .mkString(",") + ), + retryable = true + ) { + new MultiSearch.Builder( + elasticQueries.queries.map(_.search._1).asJava + ).build() + }(result => + if (result.isSucceeded) { + Some(result.getJsonString) + } else { + None + } + ) + + override def executeSingleSearchAsync( + elasticQuery: ElasticQuery + )(implicit ec: ExecutionContext): Future[ElasticResult[Option[String]]] = + executeAsyncJestAction( + operation = "executeSingleSearchAsync", + index = Some(elasticQuery.indices.mkString(",")), + retryable = true + ) { + elasticQuery.search._1 + }(result => + if (result.isSucceeded) { + Some(result.getJsonString) + } else { + None + } + ) + + override private[client] def executeMultiSearchAsync( + elasticQueries: ElasticQueries + )(implicit + ec: ExecutionContext + ): Future[ElasticResult[Option[String]]] = + executeAsyncJestAction( + operation = "executeMultiSearchAsync", + index = Some( + elasticQueries.queries + .flatMap(_.indices) + .distinct + .mkString(",") + ), + retryable = true + ) { + new MultiSearch.Builder( + elasticQueries.queries.map(_.search._1).asJava + ).build() + }(result => + if (result.isSucceeded) { + Some(result.getJsonString) + } else { + None + } + ) + +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestSettingsApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestSettingsApi.scala new file mode 100644 index 00000000..dd29e89e --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestSettingsApi.scala @@ -0,0 +1,65 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.{IndicesApi, SettingsApi} +import app.softnetwork.elastic.client.result.ElasticResult +import io.searchbox.indices.settings.{GetSettings, UpdateSettings} + +/** Settings management API for Jest (Elasticsearch HTTP Client). + * @see + * [[SettingsApi]] for generic API documentation + */ +trait JestSettingsApi extends SettingsApi with JestClientHelpers { + _: IndicesApi with JestClientCompanion => + + /** Update index settings. + * @see + * [[SettingsApi.updateSettings]] + */ + private[client] def executeUpdateSettings( + index: String, + settings: String + ): ElasticResult[Boolean] = + executeJestBooleanAction( + operation = "updateSettings", + index = Some(index), + retryable = true + ) { + new UpdateSettings.Builder(settings).addIndex(index).build() + } + + /** Load the settings of an index. + * @see + * [[SettingsApi.loadSettings]] + */ + private[client] def executeLoadSettings( + index: String + ): ElasticResult[String] = + executeJestAction( + operation = "loadSettings", + index = Some(index), + retryable = true + ) { + new GetSettings.Builder() + .addIndex(index) + .build() + } { result => + result.getJsonString + } + +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestUpdateApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestUpdateApi.scala new file mode 100644 index 00000000..f2355d62 --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestUpdateApi.scala @@ -0,0 +1,75 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.{RefreshApi, SerializationApi, UpdateApi} +import app.softnetwork.elastic.client.bulk.docAsUpsert +import app.softnetwork.elastic.client.result.ElasticResult +import io.searchbox.core.Update + +import scala.concurrent.{ExecutionContext, Future} + +/** Update Management API for Jest (Elasticsearch HTTP Client). + * @see + * [[UpdateApi]] for generic API documentation + */ +trait JestUpdateApi extends UpdateApi with JestClientHelpers { + _: RefreshApi with JestClientCompanion with SerializationApi => + + /** Update an entity in the given index. + * @see + * [[UpdateApi.updateAs]] + */ + private[client] def executeUpdate( + index: String, + id: String, + source: String, + upsert: Boolean + ): ElasticResult[Boolean] = + executeJestBooleanAction( + operation = "update", + index = Some(index), + retryable = true + ) { + new Update.Builder( + if (upsert) + docAsUpsert(source) + else + source + ).index(index).`type`("_doc").id(id).build() + } + + override private[client] def executeUpdateAsync( + index: String, + id: String, + source: String, + upsert: Boolean + )(implicit ec: ExecutionContext): Future[ElasticResult[Boolean]] = + executeAsyncJestAction( + operation = "update", + index = Some(index), + retryable = true + ) { + new Update.Builder( + if (upsert) + docAsUpsert(source) + else + source + ).index(index).`type`("_doc").id(id).build() + }(result => result.isSucceeded) + +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestVersionApi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestVersionApi.scala new file mode 100644 index 00000000..b389041b --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/jest/JestVersionApi.scala @@ -0,0 +1,40 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.jest + +import app.softnetwork.elastic.client.{result, SerializationApi, VersionApi} +import io.searchbox.core.Cat +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods + +trait JestVersionApi extends VersionApi with JestClientHelpers { + _: SerializationApi with JestClientCompanion => + override private[client] def executeVersion(): result.ElasticResult[String] = + executeJestAction( + "version", + retryable = true + )( + new Cat.NodesBuilder() + .setParameter("h", "version") + .build() + ) { result => + val jsonString = result.getJsonString + implicit val formats: DefaultFormats.type = DefaultFormats + val json = JsonMethods.parse(jsonString) + (json \\ "version").extract[String] + } +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/client/spi/JestClientSpi.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/client/spi/JestClientSpi.scala new file mode 100644 index 00000000..7522f0c5 --- /dev/null +++ b/es6/jest/src/main/scala/app/softnetwork/elastic/client/spi/JestClientSpi.scala @@ -0,0 +1,49 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.spi + +import app.softnetwork.elastic.client.ElasticClientApi +import app.softnetwork.elastic.client.jest.JestClientApi +import com.typesafe.config.Config + +class JestClientSpi extends ElasticClientSpi { + + //format:off + /** Creates an Elasticsearch client instance. + * + * @param config + * Typesafe configuration containing Elasticsearch parameters + * @return + * Configured ElasticClientApi instance + * + * @example + * {{{ + * class MyElasticClientProvider extends ElasticClientSpi { + * override def client(config: Config): ElasticClientApi = { + * new MyElasticClientImpl(config) + * } + * } + * }}} + */ + //format:on + override def client(conf: Config): ElasticClientApi = { + new JestClientApi { + override lazy val config: Config = conf + } + } + +} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/persistence/query/JestProvider.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/persistence/query/JestProvider.scala deleted file mode 100644 index c806e401..00000000 --- a/es6/jest/src/main/scala/app/softnetwork/elastic/persistence/query/JestProvider.scala +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright 2025 SOFTNETWORK - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package app.softnetwork.elastic.persistence.query - -import app.softnetwork.elastic.client.jest.JestClientApi -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.model.Timestamped - -/** Created by smanciot on 20/05/2021. - */ -trait JestProvider[T <: Timestamped] extends ElasticProvider[T] with JestClientApi { - _: ManifestWrapper[T] => -} diff --git a/es6/jest/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithJestProvider.scala b/es6/jest/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithJestProvider.scala deleted file mode 100644 index 5501a386..00000000 --- a/es6/jest/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithJestProvider.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2025 SOFTNETWORK - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package app.softnetwork.elastic.persistence.query - -import app.softnetwork.persistence.message.CrudEvent -import app.softnetwork.persistence.model.Timestamped -import app.softnetwork.persistence.query.{JournalProvider, OffsetProvider} - -trait State2ElasticProcessorStreamWithJestProvider[T <: Timestamped, E <: CrudEvent] - extends State2ElasticProcessorStream[T, E] - with JestProvider[T] { _: JournalProvider with OffsetProvider => } diff --git a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientCompanionSpec.scala b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientCompanionSpec.scala new file mode 100644 index 00000000..5baf1f65 --- /dev/null +++ b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientCompanionSpec.scala @@ -0,0 +1,122 @@ +package app.softnetwork.elastic.client + +import akka.actor.ActorSystem +import app.softnetwork.elastic.client.jest.JestClientCompanion +import app.softnetwork.elastic.scalatest.EmbeddedElasticTestKit +import app.softnetwork.persistence.generateUUID +import com.typesafe.config.ConfigFactory +import configs.ConfigReader +import org.scalatest.concurrent.ScalaFutures +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import org.slf4j.{Logger, LoggerFactory} + +import java.util.concurrent.TimeUnit +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, ExecutionContextExecutor, Future} +import scala.util.Try + +class JestClientCompanionSpec + extends AnyWordSpec + with EmbeddedElasticTestKit + with Matchers + with ScalaFutures { + + lazy val log: Logger = LoggerFactory getLogger getClass.getName + + implicit val system: ActorSystem = ActorSystem(generateUUID()) + + implicit val executionContext: ExecutionContextExecutor = system.dispatcher + + override def afterAll(): Unit = { + Await.result(system.terminate(), Duration(30, TimeUnit.SECONDS)) + super.afterAll() + } + + "JestClientCompanion" should { + + "initialize client lazily" in { + val companion = TestCompanion() + companion.isInitialized shouldBe false + + val client = companion.apply() + client should not be null + companion.isInitialized shouldBe true + } + + "return same instance on multiple calls" in { + val companion = TestCompanion() + val client1 = companion.apply() + val client2 = companion.apply() + + client1 should be theSameInstanceAs client2 + } + + "be thread-safe during initialization" in { + val companion = TestCompanion() + val futures = (1 to 100).map { _ => + Future { + companion.apply() + } + } + + val clients = Future.sequence(futures).futureValue + + // Tous les clients doivent être la même instance + clients.distinct.size shouldBe 1 + } + + "close client properly" in { + val companion = TestCompanion() + companion.apply() + companion.isInitialized shouldBe true + + companion.close() + companion.isInitialized shouldBe false + } + + "handle invalid URL gracefully" in { + val companion = TestCompanion("invalid-url") + + Try(an[IllegalArgumentException] should be thrownBy { + companion.apply() + }) + } + + "test connection successfully" in { + val companion = TestCompanion() + companion.testConnection() shouldBe true + } + } + + case class TestCompanion(config: ElasticConfig) extends JestClientCompanion { + override def elasticConfig: ElasticConfig = config + } + + object TestCompanion { + def apply(): TestCompanion = TestCompanion( + ConfigReader[ElasticConfig] + .read(elasticConfig.withFallback(ConfigFactory.load("softnetwork-elastic.conf")), "elastic") + .toEither match { + case Left(configError) => + throw configError.configException + case Right(r) => r + } + ) + + def apply(url: String): TestCompanion = TestCompanion( + ConfigReader[ElasticConfig] + .read( + ConfigFactory + .parseString(elasticConfigAsString) + .withFallback(ConfigFactory.load("softnetwork-elastic.conf")), + "elastic" + ) + .toEither match { + case Left(configError) => + throw configError.configException + case Right(r) => r.copy(credentials = ElasticCredentials(url)) + } + ) + } +} diff --git a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientSpec.scala b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientSpec.scala index 46ce98c9..fbbf9b0e 100644 --- a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientSpec.scala +++ b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestClientSpec.scala @@ -1,28 +1,3 @@ package app.softnetwork.elastic.client -import app.softnetwork.elastic.client.JestProviders.{ - BinaryProvider, - ParentProvider, - PersonProvider, - SampleProvider -} -import app.softnetwork.elastic.model.{Binary, Parent, Sample} -import app.softnetwork.elastic.persistence.query.ElasticProvider -import app.softnetwork.persistence.person.model.Person - -class JestClientSpec extends ElasticClientSpec { - - lazy val pClient: ElasticProvider[Person] with ElasticClientApi = new PersonProvider( - elasticConfig - ) - lazy val sClient: ElasticProvider[Sample] with ElasticClientApi = new SampleProvider( - elasticConfig - ) - lazy val bClient: ElasticProvider[Binary] with ElasticClientApi = new BinaryProvider( - elasticConfig - ) - - override def parentClient: ElasticProvider[Parent] with ElasticClientApi = new ParentProvider( - elasticConfig - ) -} +class JestClientSpec extends ElasticClientSpec diff --git a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestProviders.scala b/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestProviders.scala deleted file mode 100644 index 36b50b71..00000000 --- a/es6/jest/src/test/scala/app/softnetwork/elastic/client/JestProviders.scala +++ /dev/null @@ -1,47 +0,0 @@ -package app.softnetwork.elastic.client - -import app.softnetwork.elastic.model.{Binary, Parent, Sample} -import app.softnetwork.elastic.persistence.query.JestProvider -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.person.model.Person -import com.typesafe.config.Config -import io.searchbox.client.JestClient - -object JestProviders { - - class PersonProvider(es: Config) extends JestProvider[Person] with ManifestWrapper[Person] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val jestClient: JestClient = - apply(elasticConfig.credentials, elasticConfig.multithreaded) - } - - class SampleProvider(es: Config) extends JestProvider[Sample] with ManifestWrapper[Sample] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val jestClient: JestClient = - apply(elasticConfig.credentials, elasticConfig.multithreaded) - } - - class BinaryProvider(es: Config) extends JestProvider[Binary] with ManifestWrapper[Binary] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val jestClient: JestClient = - apply(elasticConfig.credentials, elasticConfig.multithreaded) - } - - class ParentProvider(es: Config) extends JestProvider[Parent] with ManifestWrapper[Parent] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val jestClient: JestClient = - apply(elasticConfig.credentials, elasticConfig.multithreaded) - } -} diff --git a/es6/jest/src/test/scala/app/softnetwork/elastic/persistence/person/JestClientPersonHandlerSpec.scala b/es6/jest/src/test/scala/app/softnetwork/elastic/persistence/person/JestClientPersonHandlerSpec.scala index 9ed5bcfa..1d4d2f29 100644 --- a/es6/jest/src/test/scala/app/softnetwork/elastic/persistence/person/JestClientPersonHandlerSpec.scala +++ b/es6/jest/src/test/scala/app/softnetwork/elastic/persistence/person/JestClientPersonHandlerSpec.scala @@ -1,32 +1,3 @@ package app.softnetwork.elastic.persistence.person -import akka.actor.typed.ActorSystem -import app.softnetwork.elastic.client.jest.JestClientApi -import app.softnetwork.elastic.persistence.query.{ElasticProvider, PersonToElasticProcessorStream} -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.person.model.Person -import app.softnetwork.persistence.person.query.PersonToExternalProcessorStream -import app.softnetwork.persistence.query.ExternalPersistenceProvider -import com.typesafe.config.Config -import org.slf4j.{Logger, LoggerFactory} - -class JestClientPersonHandlerSpec extends ElasticPersonTestKit { - - override def externalPersistenceProvider: ExternalPersistenceProvider[Person] = - new ElasticProvider[Person] with JestClientApi with ManifestWrapper[Person] { - override protected val manifestWrapper: ManifestW = ManifestW() - override lazy val config: Config = JestClientPersonHandlerSpec.this.elasticConfig - } - - override def person2ExternalProcessorStream: ActorSystem[_] => PersonToExternalProcessorStream = - sys => - new PersonToElasticProcessorStream with JestClientApi { - override val forTests: Boolean = true - override protected val manifestWrapper: ManifestW = ManifestW() - override implicit def system: ActorSystem[_] = sys - override def log: Logger = LoggerFactory getLogger getClass.getName - override lazy val config: Config = JestClientPersonHandlerSpec.this.elasticConfig - } - - override def log: Logger = LoggerFactory getLogger getClass.getName -} +class JestClientPersonHandlerSpec extends ElasticClientPersonHandlerSpec diff --git a/es6/rest/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi b/es6/rest/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi new file mode 100644 index 00000000..d24027cc --- /dev/null +++ b/es6/rest/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi @@ -0,0 +1 @@ +app.softnetwork.elastic.client.spi.RestHighLevelClientSpi \ No newline at end of file diff --git a/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala b/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala index 4e56975c..f2947125 100644 --- a/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala +++ b/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala @@ -18,31 +18,40 @@ package app.softnetwork.elastic.client.rest import akka.NotUsed import akka.actor.ActorSystem -import akka.stream.scaladsl.Flow +import akka.stream.scaladsl.{Flow, Source} import app.softnetwork.elastic.client._ -import app.softnetwork.elastic.sql.query.{SQLQuery, SQLSearchRequest} +import app.softnetwork.elastic.client.bulk._ +import app.softnetwork.elastic.client.scroll._ +import app.softnetwork.elastic.sql.query.{SQLAggregation, SQLSearchRequest} import app.softnetwork.elastic.sql.bridge._ -import app.softnetwork.elastic.{client, sql} -import app.softnetwork.persistence.model.Timestamped -import app.softnetwork.serialization.serialization import com.google.gson.JsonParser +import org.apache.http.util.EntityUtils import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions +import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest import org.elasticsearch.action.admin.indices.close.CloseIndexRequest import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest -import org.elasticsearch.action.admin.indices.flush.FlushRequest +import org.elasticsearch.action.admin.indices.flush.{FlushRequest, FlushResponse} import org.elasticsearch.action.admin.indices.open.OpenIndexRequest -import org.elasticsearch.action.admin.indices.refresh.RefreshRequest -import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest +import org.elasticsearch.action.admin.indices.refresh.{RefreshRequest, RefreshResponse} +import org.elasticsearch.action.admin.indices.settings.get.{GetSettingsRequest, GetSettingsResponse} import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest -import org.elasticsearch.action.bulk.{BulkItemResponse, BulkRequest, BulkResponse} +import org.elasticsearch.action.bulk.{BulkRequest, BulkResponse} import org.elasticsearch.action.delete.{DeleteRequest, DeleteResponse} import org.elasticsearch.action.get.{GetRequest, GetResponse} import org.elasticsearch.action.index.{IndexRequest, IndexResponse} -import org.elasticsearch.action.search.{MultiSearchRequest, SearchRequest, SearchResponse} +import org.elasticsearch.action.search.{ + ClearScrollRequest, + MultiSearchRequest, + MultiSearchResponse, + SearchRequest, + SearchResponse, + SearchScrollRequest +} +import org.elasticsearch.action.support.master.AcknowledgedResponse import org.elasticsearch.action.update.{UpdateRequest, UpdateResponse} import org.elasticsearch.action.{ActionListener, DocWriteRequest} -import org.elasticsearch.client.{Request, RequestOptions} +import org.elasticsearch.client.{GetAliasesResponse, Request, RequestOptions} import org.elasticsearch.client.core.{CountRequest, CountResponse} import org.elasticsearch.client.indices.{ CreateIndexRequest, @@ -50,23 +59,17 @@ import org.elasticsearch.client.indices.{ GetMappingsRequest, PutMappingRequest } -import org.elasticsearch.common.io.stream.InputStreamStreamInput +import org.elasticsearch.common.Strings +import org.elasticsearch.common.unit.TimeValue import org.elasticsearch.common.xcontent.{DeprecationHandler, XContentType} import org.elasticsearch.rest.RestStatus -import org.elasticsearch.search.aggregations.bucket.filter.Filter -import org.elasticsearch.search.aggregations.bucket.nested.Nested -import org.elasticsearch.search.aggregations.metrics.avg.Avg -import org.elasticsearch.search.aggregations.metrics.cardinality.Cardinality -import org.elasticsearch.search.aggregations.metrics.max.Max -import org.elasticsearch.search.aggregations.metrics.min.Min -import org.elasticsearch.search.aggregations.metrics.sum.Sum -import org.elasticsearch.search.aggregations.metrics.valuecount.ValueCount import org.elasticsearch.search.builder.SearchSourceBuilder -import org.json4s.Formats +import org.elasticsearch.search.sort.{FieldSortBuilder, SortOrder} +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods -import java.io.ByteArrayInputStream -//import scala.jdk.CollectionConverters._ -import scala.collection.JavaConverters._ +import java.io.IOException +import scala.jdk.CollectionConverters._ import scala.concurrent.{ExecutionContext, Future, Promise} import scala.language.implicitConversions import scala.util.{Failure, Success, Try} @@ -80,837 +83,750 @@ trait RestHighLevelClientApi with RestHighLevelClientRefreshApi with RestHighLevelClientFlushApi with RestHighLevelClientCountApi - with RestHighLevelClientSingleValueAggregateApi with RestHighLevelClientIndexApi with RestHighLevelClientUpdateApi with RestHighLevelClientDeleteApi with RestHighLevelClientGetApi with RestHighLevelClientSearchApi with RestHighLevelClientBulkApi + with RestHighLevelClientScrollApi + with RestHighLevelClientCompanion + with RestHighLevelClientVersion -trait RestHighLevelClientIndicesApi extends IndicesApi with RestHighLevelClientCompanion { - override def createIndex(index: String, settings: String): Boolean = { - tryOrElse( - apply() - .indices() - .create( - new CreateIndexRequest(index) - .settings(settings, XContentType.JSON), - RequestOptions.DEFAULT - ) - .isAcknowledged, - false - )(logger) - } +/** Version API implementation for RestHighLevelClient + * @see + * [[VersionApi]] for generic API documentation + */ +trait RestHighLevelClientVersion extends VersionApi with RestHighLevelClientHelpers { + _: RestHighLevelClientCompanion with SerializationApi => + override private[client] def executeVersion(): result.ElasticResult[String] = + executeRestLowLevelAction[String]( + operation = "version", + index = None, + retryable = true + )( + request = new Request("GET", "/") + )( + transformer = resp => { + val jsonString = EntityUtils.toString(resp.getEntity) + implicit val formats: DefaultFormats.type = DefaultFormats + val json = JsonMethods.parse(jsonString) + (json \ "version" \ "number").extract[String] + } + ) +} - override def deleteIndex(index: String): Boolean = { - tryOrElse( - apply() - .indices() - .delete(new DeleteIndexRequest(index), RequestOptions.DEFAULT) - .isAcknowledged, - false - )(logger) +/** Indices management API for RestHighLevelClient + * @see + * [[IndicesApi]] for generic API documentation + */ +trait RestHighLevelClientIndicesApi extends IndicesApi with RestHighLevelClientHelpers { + _: RefreshApi with RestHighLevelClientCompanion => + override private[client] def executeCreateIndex( + index: String, + settings: String + ): result.ElasticResult[Boolean] = { + executeRestBooleanAction[CreateIndexRequest, AcknowledgedResponse]( + operation = "createIndex", + index = Some(index), + retryable = false + )( + request = new CreateIndexRequest(index).settings(settings, XContentType.JSON) + )( + executor = req => apply().indices().create(req, RequestOptions.DEFAULT) + ) } - override def openIndex(index: String): Boolean = { - tryOrElse( - apply().indices().open(new OpenIndexRequest(index), RequestOptions.DEFAULT).isAcknowledged, - false - )(logger) - } + override private[client] def executeDeleteIndex(index: String): result.ElasticResult[Boolean] = + executeRestBooleanAction[DeleteIndexRequest, AcknowledgedResponse]( + operation = "deleteIndex", + index = Some(index), + retryable = false + )( + request = new DeleteIndexRequest(index) + )( + executor = req => apply().indices().delete(req, RequestOptions.DEFAULT) + ) - override def closeIndex(index: String): Boolean = { - tryOrElse( - apply().indices().close(new CloseIndexRequest(index), RequestOptions.DEFAULT).isAcknowledged, - false - )(logger) - } + override private[client] def executeCloseIndex(index: String): result.ElasticResult[Boolean] = + executeRestBooleanAction[CloseIndexRequest, AcknowledgedResponse]( + operation = "closeIndex", + index = Some(index), + retryable = false + )( + request = new CloseIndexRequest(index) + )( + executor = req => apply().indices().close(req, RequestOptions.DEFAULT) + ) - /** Reindex from source index to target index. - * - * @param sourceIndex - * - the name of the source index - * @param targetIndex - * - the name of the target index - * @param refresh - * - true to refresh the target index after reindexing, false otherwise - * @return - * true if the reindexing was successful, false otherwise - */ - override def reindex(sourceIndex: String, targetIndex: String, refresh: Boolean): Boolean = { - val request = new Request("POST", s"/_reindex?refresh=$refresh") - request.setJsonEntity( - s""" - |{ - | "source": { - | "index": "$sourceIndex" - | }, - | "dest": { - | "index": "$targetIndex" - | } - |} - """.stripMargin + override private[client] def executeOpenIndex(index: String): result.ElasticResult[Boolean] = + executeRestBooleanAction[OpenIndexRequest, AcknowledgedResponse]( + operation = "openIndex", + index = Some(index), + retryable = false + )( + request = new OpenIndexRequest(index) + )( + executor = req => apply().indices().open(req, RequestOptions.DEFAULT) ) - tryOrElse( - apply().getLowLevelClient.performRequest(request).getStatusLine.getStatusCode < 400, - false - )(logger) - } - /** Check if an index exists. - * - * @param index - * - the name of the index to check - * @return - * true if the index exists, false otherwise - */ - override def indexExists(index: String): Boolean = { - tryOrElse( - apply().indices().exists(new GetIndexRequest(index), RequestOptions.DEFAULT), - false - )(logger) - } + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): result.ElasticResult[(Boolean, Option[Long])] = + executeRestAction[Request, org.elasticsearch.client.Response, (Boolean, Option[Long])]( + operation = "reindex", + index = Some(s"$sourceIndex->$targetIndex"), + retryable = false + )( + request = { + val req = new Request("POST", s"/_reindex?refresh=$refresh") + req.setJsonEntity( + s""" + |{ + | "source": { + | "index": "$sourceIndex" + | }, + | "dest": { + | "index": "$targetIndex" + | } + |} + """.stripMargin + ) + req + } + )( + executor = req => apply().getLowLevelClient.performRequest(req) + )(resp => { + resp.getStatusLine match { + case statusLine if statusLine.getStatusCode >= 400 => + (false, None) + case _ => + val json = new JsonParser() + .parse( + scala.io.Source.fromInputStream(resp.getEntity.getContent).mkString + ) + .getAsJsonObject + if (json.has("failures") && json.get("failures").getAsJsonArray.size() > 0) { + (false, None) + } else { + (true, Some(json.get("created").getAsLong)) + } + } + }) + + override private[client] def executeIndexExists(index: String): result.ElasticResult[Boolean] = + executeRestAction[GetIndexRequest, Boolean, Boolean]( + operation = "indexExists", + index = Some(index), + retryable = false + )( + request = new GetIndexRequest(index) + )( + executor = req => apply().indices().exists(req, RequestOptions.DEFAULT) + )( + identity + ) } -trait RestHighLevelClientAliasApi extends AliasApi with RestHighLevelClientCompanion { - override def addAlias(index: String, alias: String): Boolean = { - tryOrElse( - apply() - .indices() - .updateAliases( - new IndicesAliasesRequest() - .addAliasAction( - new AliasActions(AliasActions.Type.ADD) - .index(index) - .alias(alias) - ), - RequestOptions.DEFAULT +/** Alias management API for RestHighLevelClient + * @see + * [[AliasApi]] for generic API documentation + */ +trait RestHighLevelClientAliasApi extends AliasApi with RestHighLevelClientHelpers { + _: IndicesApi with RestHighLevelClientCompanion => + + override private[client] def executeAddAlias( + index: String, + alias: String + ): result.ElasticResult[Boolean] = + executeRestBooleanAction( + operation = "addAlias", + index = Some(index), + retryable = false + )( + request = new IndicesAliasesRequest() + .addAliasAction( + new AliasActions(AliasActions.Type.ADD) + .index(index) + .alias(alias) ) - .isAcknowledged, - false - )(logger) - } + )( + executor = req => apply().indices().updateAliases(req, RequestOptions.DEFAULT) + ) - override def removeAlias(index: String, alias: String): Boolean = { - tryOrElse( - apply() - .indices() - .updateAliases( - new IndicesAliasesRequest() - .addAliasAction( - new AliasActions(AliasActions.Type.REMOVE) - .index(index) - .alias(alias) - ), - RequestOptions.DEFAULT + override private[client] def executeRemoveAlias( + index: String, + alias: String + ): result.ElasticResult[Boolean] = + executeRestBooleanAction( + operation = "removeAlias", + index = Some(index), + retryable = false + )( + request = new IndicesAliasesRequest() + .addAliasAction( + new AliasActions(AliasActions.Type.REMOVE) + .index(index) + .alias(alias) ) - .isAcknowledged, - false - )(logger) - } -} + )( + executor = req => apply().indices().updateAliases(req, RequestOptions.DEFAULT) + ) + + override private[client] def executeAliasExists(alias: String): result.ElasticResult[Boolean] = + executeRestAction[GetAliasesRequest, GetAliasesResponse, Boolean]( + operation = "aliasExists", + index = Some(alias), + retryable = true + )( + request = new GetAliasesRequest().aliases(alias) + )( + executor = req => apply().indices().getAlias(req, RequestOptions.DEFAULT) + )(response => !response.getAliases.isEmpty) -trait RestHighLevelClientSettingsApi extends SettingsApi with RestHighLevelClientCompanion { - _: RestHighLevelClientIndicesApi => - - override def updateSettings(index: String, settings: String): Boolean = { - tryOrElse( - apply() - .indices() - .putSettings( - new UpdateSettingsRequest(index) - .settings(settings, XContentType.JSON), - RequestOptions.DEFAULT + override private[client] def executeGetAliases(index: String): result.ElasticResult[String] = + executeRestAction[GetAliasesRequest, GetAliasesResponse, String]( + operation = "getAliases", + index = Some(index), + retryable = true + )( + request = new GetAliasesRequest().indices(index) + )( + executor = req => apply().indices().getAlias(req, RequestOptions.DEFAULT) + )(response => Strings.toString(response)) + + override private[client] def executeSwapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): result.ElasticResult[Boolean] = + executeRestBooleanAction( + operation = "swapAlias", + index = Some(s"$oldIndex -> $newIndex"), + retryable = false + )( + request = new IndicesAliasesRequest() + .addAliasAction( + new AliasActions(AliasActions.Type.REMOVE) + .index(oldIndex) + .alias(alias) ) - .isAcknowledged, - false - )(logger) - } + .addAliasAction( + new AliasActions(AliasActions.Type.ADD) + .index(newIndex) + .alias(alias) + ) + )( + executor = req => apply().indices().updateAliases(req, RequestOptions.DEFAULT) + ) - override def loadSettings(index: String): String = { - tryOrElse( - { - new JsonParser() - .parse( - apply() - .indices() - .getSettings( - new GetSettingsRequest().indices(index), - RequestOptions.DEFAULT - ) - .toString - ) - .getAsJsonObject - .get(index) - .getAsJsonObject - .get("settings") - .getAsJsonObject - .get("index") - .getAsJsonObject - .toString - }, - "{}" - )(logger) - } } -trait RestHighLevelClientMappingApi extends MappingApi with RestHighLevelClientCompanion { - override def setMapping(index: String, mapping: String): Boolean = { - tryOrElse( - apply() - .indices() - .putMapping( - new PutMappingRequest(index) - .source(mapping, XContentType.JSON), - RequestOptions.DEFAULT - ) - .isAcknowledged, - false - )(logger) - } +/** Settings management API for RestHighLevelClient + * @see + * [[SettingsApi]] for generic API documentation + */ +trait RestHighLevelClientSettingsApi extends SettingsApi with RestHighLevelClientHelpers { + _: IndicesApi with RestHighLevelClientCompanion => - override def getMapping(index: String): String = { - tryOrElse( - apply() - .indices() - .getMapping( - new GetMappingsRequest().indices(index), - RequestOptions.DEFAULT - ) - .mappings() - .asScala - .get(index) - .map(metadata => metadata.source().string()), - None - )(logger).getOrElse(s""""{$index: {"mappings": {}}}""") - } + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): result.ElasticResult[Boolean] = + executeRestBooleanAction( + operation = "updateSettings", + index = Some(index), + retryable = false + )( + request = new UpdateSettingsRequest(index) + .settings(settings, XContentType.JSON) + )( + executor = req => apply().indices().putSettings(req, RequestOptions.DEFAULT) + ) - override def getMappingProperties(index: String): String = { - tryOrElse( - getMapping(index), - "{\"properties\": {}}" - )(logger) - } + override private[client] def executeLoadSettings(index: String): result.ElasticResult[String] = + executeRestAction[GetSettingsRequest, GetSettingsResponse, String]( + operation = "loadSettings", + index = Some(index), + retryable = true + )( + request = new GetSettingsRequest().indices(index) + )( + executor = req => apply().indices().getSettings(req, RequestOptions.DEFAULT) + )(response => response.toString) } -trait RestHighLevelClientRefreshApi extends RefreshApi with RestHighLevelClientCompanion { - override def refresh(index: String): Boolean = { - tryOrElse( - apply() - .indices() - .refresh( - new RefreshRequest(index), - RequestOptions.DEFAULT - ) - .getStatus - .getStatus < 400, - false - )(logger) - } -} +/** Mapping API implementation for RestHighLevelClient + * @see + * [[MappingApi]] for generic API documentation + */ +trait RestHighLevelClientMappingApi extends MappingApi with RestHighLevelClientHelpers { + _: SettingsApi with IndicesApi with RefreshApi with RestHighLevelClientCompanion => + override private[client] def executeSetMapping( + index: String, + mapping: String + ): result.ElasticResult[Boolean] = + executeRestBooleanAction( + operation = "setMapping", + index = Some(index), + retryable = false + )( + request = new PutMappingRequest(index) + .source(mapping, XContentType.JSON) + )( + executor = req => apply().indices().putMapping(req, RequestOptions.DEFAULT) + ) + + override private[client] def executeGetMapping(index: String): result.ElasticResult[String] = + executeRestAction[ + GetMappingsRequest, + org.elasticsearch.client.indices.GetMappingsResponse, + String + ]( + operation = "getMapping", + index = Some(index), + retryable = true + )( + request = new GetMappingsRequest().indices(index) + )( + executor = req => apply().indices().getMapping(req, RequestOptions.DEFAULT) + )(response => { + val mappings = response.mappings().asScala.get(index) + mappings match { + case Some(metadata) => metadata.source().toString + case None => s"""{"properties": {}}""" + } + }) -trait RestHighLevelClientFlushApi extends FlushApi with RestHighLevelClientCompanion { - override def flush(index: String, force: Boolean = true, wait: Boolean = true): Boolean = { - tryOrElse( - apply() - .indices() - .flush( - new FlushRequest(index).force(force).waitIfOngoing(wait), - RequestOptions.DEFAULT - ) - .getStatus == RestStatus.OK, - false - )(logger) - } } -trait RestHighLevelClientCountApi extends CountApi with RestHighLevelClientCompanion { - override def countAsync( - query: client.JSONQuery - )(implicit ec: ExecutionContext): Future[Option[Double]] = { - val promise = Promise[Option[Double]]() - apply().countAsync( - new CountRequest().indices(query.indices: _*).types(query.types: _*), - RequestOptions.DEFAULT, - new ActionListener[CountResponse] { - override def onResponse(response: CountResponse): Unit = - promise.success(Option(response.getCount.toDouble)) - - override def onFailure(e: Exception): Unit = promise.failure(e) - } - ) - promise.future - } +/** Refresh API implementation for RestHighLevelClient + * @see + * [[RefreshApi]] for generic API documentation + */ +trait RestHighLevelClientRefreshApi extends RefreshApi with RestHighLevelClientHelpers { + _: RestHighLevelClientCompanion => + override private[client] def executeRefresh(index: String): result.ElasticResult[Boolean] = + executeRestAction[RefreshRequest, RefreshResponse, Boolean]( + operation = "refresh", + index = Some(index), + retryable = true + )( + request = new RefreshRequest(index) + )( + executor = req => apply().indices().refresh(req, RequestOptions.DEFAULT) + )(response => response.getStatus.getStatus < 400) - override def count(query: client.JSONQuery): Option[Double] = { - tryOrElse( - Option( - apply() - .count( - new CountRequest().indices(query.indices: _*).types(query.types: _*), - RequestOptions.DEFAULT - ) - .getCount - .toDouble - ), - None - )(logger) - } } -trait RestHighLevelClientSingleValueAggregateApi - extends SingleValueAggregateApi - with RestHighLevelClientCountApi { - override def aggregate( - sqlQuery: SQLQuery - )(implicit ec: ExecutionContext): Future[Seq[SingleValueAggregateResult]] = { - val aggregations: Seq[ElasticAggregation] = sqlQuery - val futures = for (aggregation <- aggregations) yield { - val promise: Promise[SingleValueAggregateResult] = Promise() - val field = aggregation.field - val sourceField = aggregation.sourceField - val aggType = aggregation.aggType - val aggName = aggregation.aggName - val query = aggregation.query.getOrElse("") - val sources = aggregation.sources - sourceField match { - case "_id" if aggType.sql == "count" => - countAsync( - JSONQuery( - query, - collection.immutable.Seq(sources: _*), - collection.immutable.Seq.empty[String] - ) - ).onComplete { - case Success(result) => - promise.success( - SingleValueAggregateResult( - field, - aggType, - result.map(r => NumericValue(r.doubleValue())).getOrElse(EmptyValue), - None - ) - ) - case Failure(f) => - logger.error(f.getMessage, f.fillInStackTrace()) - promise.success( - SingleValueAggregateResult(field, aggType, EmptyValue, Some(f.getMessage)) - ) - } - promise.future - case _ => - val jsonQuery = JSONQuery( - query, - collection.immutable.Seq(sources: _*), - collection.immutable.Seq.empty[String] - ) - import jsonQuery._ - // Create a parser for the query - val xContentParser = XContentType.JSON - .xContent() - .createParser( - namedXContentRegistry, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - jsonQuery.query - ) - apply().searchAsync( - new SearchRequest(indices: _*) - .types(types: _*) - .source( - SearchSourceBuilder.fromXContent(xContentParser) - ), - RequestOptions.DEFAULT, - new ActionListener[SearchResponse] { - override def onResponse(response: SearchResponse): Unit = { - val agg = aggName.split("\\.").last - - val itAgg = aggName.split("\\.").iterator - - var root = - if (aggregation.nested) { - response.getAggregations.get(itAgg.next()).asInstanceOf[Nested].getAggregations - } else { - response.getAggregations - } - - if (aggregation.filtered) { - root = root.get(itAgg.next()).asInstanceOf[Filter].getAggregations - } +/** Flush API implementation for RestHighLevelClient + * @see + * [[FlushApi]] for generic API documentation + */ +trait RestHighLevelClientFlushApi extends FlushApi with RestHighLevelClientHelpers { + _: RestHighLevelClientCompanion => + override private[client] def executeFlush( + index: String, + force: Boolean, + wait: Boolean + ): result.ElasticResult[Boolean] = + executeRestAction[FlushRequest, FlushResponse, Boolean]( + operation = "flush", + index = Some(index), + retryable = true + )( + request = new FlushRequest(index).force(force).waitIfOngoing(wait) + )( + executor = req => apply().indices().flush(req, RequestOptions.DEFAULT) + )(response => response.getStatus == RestStatus.OK) - promise.success( - SingleValueAggregateResult( - field, - aggType, - aggType match { - case sql.function.aggregate.COUNT => - if (aggregation.distinct) { - NumericValue(root.get(agg).asInstanceOf[Cardinality].value()) - } else { - NumericValue(root.get(agg).asInstanceOf[ValueCount].value()) - } - case sql.function.aggregate.SUM => - NumericValue(root.get(agg).asInstanceOf[Sum].value()) - case sql.function.aggregate.AVG => - NumericValue(root.get(agg).asInstanceOf[Avg].value()) - case sql.function.aggregate.MIN => - NumericValue(root.get(agg).asInstanceOf[Min].value()) - case sql.function.aggregate.MAX => - NumericValue(root.get(agg).asInstanceOf[Max].value()) - case _ => EmptyValue - }, - None - ) - ) - } +} - override def onFailure(e: Exception): Unit = promise.failure(e) - } - ) - promise.future - } - } - Future.sequence(futures) +/** Count API implementation for RestHighLevelClient + * @see + * [[CountApi]] for generic API documentation + */ +trait RestHighLevelClientCountApi extends CountApi with RestHighLevelClientHelpers { + _: RestHighLevelClientCompanion => + override private[client] def executeCount( + query: ElasticQuery + ): result.ElasticResult[Option[Double]] = + executeRestAction[CountRequest, CountResponse, Option[Double]]( + operation = "count", + index = Some(query.indices.mkString(",")), + retryable = true + )( + request = new CountRequest().indices(query.indices: _*).types(query.types: _*) + )( + executor = req => apply().count(req, RequestOptions.DEFAULT) + )(response => Option(response.getCount.toDouble)) + + override private[client] def executeCountAsync( + query: ElasticQuery + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Option[Double]]] = { + executeAsyncRestAction[CountRequest, CountResponse, Option[Double]]( + operation = "countAsync", + index = Some(query.indices.mkString(",")), + retryable = true + )( + request = new CountRequest().indices(query.indices: _*).types(query.types: _*) + )( + executor = (req, listener) => apply().countAsync(req, RequestOptions.DEFAULT, listener) + )(response => Option(response.getCount.toDouble)) } + } -trait RestHighLevelClientIndexApi extends IndexApi with RestHighLevelClientCompanion { - _: RestHighLevelClientRefreshApi => - override def index(index: String, id: String, source: String): Boolean = { - tryOrElse( - apply() - .index( - new IndexRequest(index) - .`type`("_doc") - .id(id) - .source(source, XContentType.JSON), - RequestOptions.DEFAULT - ) - .status() - .getStatus < 400, - false - )(logger) - } +/** Index API implementation for RestHighLevelClient + * @see + * [[IndexApi]] for generic API documentation + */ +trait RestHighLevelClientIndexApi extends IndexApi with RestHighLevelClientHelpers { + _: RefreshApi with RestHighLevelClientCompanion with SerializationApi => + override private[client] def executeIndex( + index: String, + id: String, + source: String + ): result.ElasticResult[Boolean] = + executeRestAction[IndexRequest, IndexResponse, Boolean]( + operation = "index", + index = Some(index), + retryable = false + )( + request = new IndexRequest(index) + .`type`("_doc") + .id(id) + .source(source, XContentType.JSON) + )( + executor = req => apply().index(req, RequestOptions.DEFAULT) + )(response => response.status().getStatus < 400) - override def indexAsync(index: String, id: String, source: String)(implicit + override private[client] def executeIndexAsync(index: String, id: String, source: String)(implicit ec: ExecutionContext - ): Future[Boolean] = { - val promise: Promise[Boolean] = Promise() - apply().indexAsync( - new IndexRequest(index) + ): Future[result.ElasticResult[Boolean]] = + executeAsyncRestAction[IndexRequest, IndexResponse, Boolean]( + operation = "indexAsync", + index = Some(index), + retryable = false + )( + request = new IndexRequest(index) .`type`("_doc") .id(id) - .source(source, XContentType.JSON), - RequestOptions.DEFAULT, - new ActionListener[IndexResponse] { - override def onResponse(response: IndexResponse): Unit = - promise.success(response.status().getStatus < 400) + .source(source, XContentType.JSON) + )( + executor = (req, listener) => apply().indexAsync(req, RequestOptions.DEFAULT, listener) + )(response => response.status().getStatus < 400) - override def onFailure(e: Exception): Unit = promise.failure(e) - } - ) - promise.future - } } -trait RestHighLevelClientUpdateApi extends UpdateApi with RestHighLevelClientCompanion { - _: RestHighLevelClientRefreshApi => - override def update( +/** Update API implementation for RestHighLevelClient + * @see + * [[UpdateApi]] for generic API documentation + */ +trait RestHighLevelClientUpdateApi extends UpdateApi with RestHighLevelClientHelpers { + _: RefreshApi with RestHighLevelClientCompanion with SerializationApi => + override private[client] def executeUpdate( index: String, id: String, source: String, upsert: Boolean - ): Boolean = { - tryOrElse( - apply() - .update( - new UpdateRequest(index, "_doc", id) - .doc(source, XContentType.JSON) - .docAsUpsert(upsert), - RequestOptions.DEFAULT - ) - .status() - .getStatus < 400, - false - )(logger) - } + ): result.ElasticResult[Boolean] = + executeRestAction[UpdateRequest, UpdateResponse, Boolean]( + operation = "update", + index = Some(index), + retryable = false + )( + request = new UpdateRequest(index, "_doc", id) + .doc(source, XContentType.JSON) + .docAsUpsert(upsert) + )( + executor = req => apply().update(req, RequestOptions.DEFAULT) + )(response => response.status().getStatus < 400) - override def updateAsync( + override private[client] def executeUpdateAsync( index: String, id: String, source: String, upsert: Boolean - )(implicit ec: ExecutionContext): Future[Boolean] = { - val promise: Promise[Boolean] = Promise() - apply().updateAsync( - new UpdateRequest(index, "_doc", id) + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Boolean]] = + executeAsyncRestAction[UpdateRequest, UpdateResponse, Boolean]( + operation = "updateAsync", + index = Some(index), + retryable = false + )( + request = new UpdateRequest(index, "_doc", id) .doc(source, XContentType.JSON) - .docAsUpsert(upsert), - RequestOptions.DEFAULT, - new ActionListener[UpdateResponse] { - override def onResponse(response: UpdateResponse): Unit = - promise.success(response.status().getStatus < 400) + .docAsUpsert(upsert) + )( + executor = (req, listener) => apply().updateAsync(req, RequestOptions.DEFAULT, listener) + )(response => response.status().getStatus < 400) - override def onFailure(e: Exception): Unit = promise.failure(e) - } - ) - promise.future - } } -trait RestHighLevelClientDeleteApi extends DeleteApi with RestHighLevelClientCompanion { - _: RestHighLevelClientRefreshApi => +/** Delete API implementation for RestHighLevelClient + * @see + * [[DeleteApi]] for generic API documentation + */ +trait RestHighLevelClientDeleteApi extends DeleteApi with RestHighLevelClientHelpers { + _: RefreshApi with RestHighLevelClientCompanion => - override def delete(uuid: String, index: String): Boolean = { - tryOrElse( - apply() - .delete( - new DeleteRequest(index, "_doc", uuid), - RequestOptions.DEFAULT - ) - .status() - .getStatus < 400, - false - )(logger) - } + override private[client] def executeDelete( + index: String, + id: String + ): result.ElasticResult[Boolean] = + executeRestAction[DeleteRequest, DeleteResponse, Boolean]( + operation = "delete", + index = Some(index), + retryable = false + )( + request = new DeleteRequest(index, "_doc", id) + )( + executor = req => apply().delete(req, RequestOptions.DEFAULT) + )(response => response.status().getStatus < 400) - override def deleteAsync(uuid: String, index: String)(implicit + override private[client] def executeDeleteAsync(index: String, id: String)(implicit ec: ExecutionContext - ): Future[Boolean] = { - val promise: Promise[Boolean] = Promise() - apply().deleteAsync( - new DeleteRequest(index, "_doc", uuid), - RequestOptions.DEFAULT, - new ActionListener[DeleteResponse] { - override def onResponse(response: DeleteResponse): Unit = - promise.success(response.status().getStatus < 400) - - override def onFailure(e: Exception): Unit = promise.failure(e) - } - ) - promise.future - } + ): Future[result.ElasticResult[Boolean]] = + executeAsyncRestAction[DeleteRequest, DeleteResponse, Boolean]( + operation = "deleteAsync", + index = Some(index), + retryable = false + )( + request = new DeleteRequest(index, "_doc", id) + )( + executor = (req, listener) => apply().deleteAsync(req, RequestOptions.DEFAULT, listener) + )(response => response.status().getStatus < 400) + } -trait RestHighLevelClientGetApi extends GetApi with RestHighLevelClientCompanion { - def get[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], formats: Formats): Option[U] = { - Try( - apply().get( - new GetRequest( - index.getOrElse( - maybeType.getOrElse( - m.runtimeClass.getSimpleName.toLowerCase - ) - ), - "_doc", - id - ), - RequestOptions.DEFAULT - ) - ) match { - case Success(response) => - if (response.isExists) { - val source = response.getSourceAsString - logger.info(s"Deserializing response $source for id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}") - // Deserialize the source string to the expected type - // Note: This assumes that the source is a valid JSON representation of U - // and that the serialization library is capable of handling it. - Try(serialization.read[U](source)) match { - case Success(value) => Some(value) - case Failure(f) => - logger.error( - s"Failed to deserialize response $source for id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}", - f - ) - None - } - } else { - None - } - case Failure(f) => - logger.error( - s"Failed to get document with id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}", - f - ) +/** Get API implementation for RestHighLevelClient + * @see + * [[GetApi]] for generic API documentation + */ +trait RestHighLevelClientGetApi extends GetApi with RestHighLevelClientHelpers { + _: RestHighLevelClientCompanion with SerializationApi => + override private[client] def executeGet( + index: String, + id: String + ): result.ElasticResult[Option[String]] = + executeRestAction[GetRequest, GetResponse, Option[String]]( + operation = "get", + index = Some(index), + retryable = true + )( + request = new GetRequest(index, "_doc", id) + )( + executor = req => apply().get(req, RequestOptions.DEFAULT) + )(response => { + if (response.isExists) { + Some(response.getSourceAsString) + } else { None - } - } - - override def getAsync[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], ec: ExecutionContext, formats: Formats): Future[Option[U]] = { - val promise = Promise[Option[U]]() - apply().getAsync( - new GetRequest( - index.getOrElse( - maybeType.getOrElse( - m.runtimeClass.getSimpleName.toLowerCase - ) - ), - "_doc", - id - ), - RequestOptions.DEFAULT, - new ActionListener[GetResponse] { - override def onResponse(response: GetResponse): Unit = { - if (response.isExists) { - promise.success(Some(serialization.read[U](response.getSourceAsString))) - } else { - promise.success(None) - } - } + } + }) - override def onFailure(e: Exception): Unit = promise.failure(e) + override private[client] def executeGetAsync(index: String, id: String)(implicit + ec: ExecutionContext + ): Future[result.ElasticResult[Option[String]]] = + executeAsyncRestAction[GetRequest, GetResponse, Option[String]]( + operation = "getAsync", + index = Some(index), + retryable = true + )( + request = new GetRequest(index, "_doc", id) + )( + executor = (req, listener) => apply().getAsync(req, RequestOptions.DEFAULT, listener) + )(response => { + if (response.isExists) { + Some(response.getSourceAsString) + } else { + None } - ) - promise.future - } + }) + } -trait RestHighLevelClientSearchApi extends SearchApi with RestHighLevelClientCompanion { +/** Search API implementation for RestHighLevelClient + * @see + * [[SearchApi]] for generic API documentation + */ +trait RestHighLevelClientSearchApi extends SearchApi with RestHighLevelClientHelpers { + _: ElasticConversion with RestHighLevelClientCompanion with SerializationApi => + override implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String = implicitly[ElasticSearchRequest](sqlSearch).query - override def search[U]( - jsonQuery: JSONQuery - )(implicit m: Manifest[U], formats: Formats): List[U] = { - import jsonQuery._ - logger.info(s"Searching with query: $query on indices: ${indices.mkString(", ")}") - // Create a parser for the query - val xContentParser = XContentType.JSON - .xContent() - .createParser( - namedXContentRegistry, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - query - ) - val response = apply().search( - new SearchRequest(indices: _*) - .types(types: _*) - .source( - SearchSourceBuilder.fromXContent(xContentParser) - ), - RequestOptions.DEFAULT - ) - if (response.getHits.getTotalHits > 0) { - response.getHits.getHits.toList.map { hit => - logger.info(s"Deserializing hit: ${hit.getSourceAsString}") - serialization.read[U](hit.getSourceAsString) + override private[client] def executeSingleSearch( + elasticQuery: ElasticQuery + ): result.ElasticResult[Option[String]] = + executeRestAction[SearchRequest, SearchResponse, Option[String]]( + operation = "singleSearch", + index = Some(elasticQuery.indices.mkString(",")), + retryable = true + )( + request = { + val req = new SearchRequest(elasticQuery.indices: _*).types(elasticQuery.types: _*) + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + elasticQuery.query + ) + req.source(SearchSourceBuilder.fromXContent(xContentParser)) + req } - } else { - List.empty[U] - } - } + )( + executor = req => apply().search(req, RequestOptions.DEFAULT) + )(response => { + if (response.status() == RestStatus.OK) { + Some(Strings.toString(response)) + } else { + None + } + }) - override def searchAsync[U]( - sqlQuery: SQLQuery - )(implicit m: Manifest[U], ec: ExecutionContext, formats: Formats): Future[List[U]] = { - val jsonQuery: JSONQuery = sqlQuery - import jsonQuery._ - val promise = Promise[List[U]]() - logger.info(s"Searching with query: $query on indices: ${indices.mkString(", ")}") - // Create a parser for the query - val xContentParser = XContentType.JSON - .xContent() - .createParser( - namedXContentRegistry, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - query - ) - // Execute the search asynchronously - apply().searchAsync( - new SearchRequest(indices: _*) - .types(types: _*) - .source( - SearchSourceBuilder.fromXContent(xContentParser) - ), - RequestOptions.DEFAULT, - new ActionListener[SearchResponse] { - override def onResponse(response: SearchResponse): Unit = { - if (response.getHits.getTotalHits > 0) { - promise.success(response.getHits.getHits.toList.map { hit => - serialization.read[U](hit.getSourceAsString) - }) - } else { - promise.success(List.empty[U]) - } + override private[client] def executeMultiSearch( + elasticQueries: ElasticQueries + ): result.ElasticResult[Option[String]] = + executeRestAction[MultiSearchRequest, MultiSearchResponse, Option[String]]( + operation = "multiSearch", + index = Some( + elasticQueries.queries + .flatMap(_.indices) + .distinct + .mkString(",") + ), + retryable = true + )( + request = { + val req = new MultiSearchRequest() + for (query <- elasticQueries.queries) { + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + query.query + ) + val searchSourceBuilder = SearchSourceBuilder.fromXContent(xContentParser) + req.add( + new SearchRequest(query.indices: _*) + .types(query.types: _*) + .source(searchSourceBuilder) + ) } - - override def onFailure(e: Exception): Unit = promise.failure(e) + req } - ) - promise.future - } - - override def searchWithInnerHits[U, I](jsonQuery: JSONQuery, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[(U, List[I])] = { - import jsonQuery._ - // Create a parser for the query - val xContentParser = XContentType.JSON - .xContent() - .createParser( - namedXContentRegistry, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - jsonQuery.query - ) - val response = apply().search( - new SearchRequest(indices: _*) - .types(types: _*) - .source( - SearchSourceBuilder.fromXContent(xContentParser) - ), - RequestOptions.DEFAULT - ) - Try(new JsonParser().parse(response.toString).getAsJsonObject ~> [U, I] innerField) match { - case Success(s) => s - case Failure(f) => - logger.error(f.getMessage, f) - List.empty - } - } + )( + executor = req => apply().msearch(req, RequestOptions.DEFAULT) + )(response => Some(Strings.toString(response))) - override def multiSearch[U]( - jsonQueries: JSONQueries - )(implicit m: Manifest[U], formats: Formats): List[List[U]] = { - import jsonQueries._ - val request = new MultiSearchRequest() - for (query <- queries) { - request.add( - new SearchRequest(query.indices: _*) - .types(query.types: _*) - .source( - new SearchSourceBuilder( - new InputStreamStreamInput( - new ByteArrayInputStream( - query.query.getBytes() - ) - ) - ) + override private[client] def executeSingleSearchAsync( + elasticQuery: ElasticQuery + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Option[String]]] = + executeAsyncRestAction[SearchRequest, SearchResponse, Option[String]]( + operation = "executeSingleSearchAsync", + index = Some(elasticQuery.indices.mkString(",")), + retryable = true + )( + request = { + val req = new SearchRequest(elasticQuery.indices: _*).types(elasticQuery.types: _*) + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + elasticQuery.query ) - ) - } - val responses = apply().msearch(request, RequestOptions.DEFAULT) - responses.getResponses.toList.map { response => - if (response.isFailure) { - logger.error(s"Error in multi search: ${response.getFailureMessage}") - List.empty[U] + req.source(SearchSourceBuilder.fromXContent(xContentParser)) + req + } + )( + executor = (req, listener) => apply().searchAsync(req, RequestOptions.DEFAULT, listener) + )(response => { + if (response.status() == RestStatus.OK) { + Some(Strings.toString(response)) } else { - response.getResponse.getHits.getHits.toList.map { hit => - serialization.read[U](hit.getSourceAsString) - } + None } - } - } + }) - override def multiSearchWithInnerHits[U, I](jsonQueries: JSONQueries, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[List[(U, List[I])]] = { - import jsonQueries._ - val request = new MultiSearchRequest() - for (query <- queries) { - request.add( - new SearchRequest(query.indices: _*) - .types(query.types: _*) - .source( - new SearchSourceBuilder( - new InputStreamStreamInput( - new ByteArrayInputStream( - query.query.getBytes() - ) - ) + override private[client] def executeMultiSearchAsync( + elasticQueries: ElasticQueries + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Option[String]]] = + executeAsyncRestAction[MultiSearchRequest, MultiSearchResponse, Option[String]]( + operation = "executeMultiSearchAsync", + index = Some( + elasticQueries.queries + .flatMap(_.indices) + .distinct + .mkString(",") + ), + retryable = true + )( + request = { + val req = new MultiSearchRequest() + for (query <- elasticQueries.queries) { + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + query.query ) + val searchSourceBuilder = SearchSourceBuilder.fromXContent(xContentParser) + req.add( + new SearchRequest(query.indices: _*) + .types(query.types: _*) + .source(searchSourceBuilder) ) - ) - } - val responses = apply().msearch(request, RequestOptions.DEFAULT) - responses.getResponses.toList.map { response => - if (response.isFailure) { - logger.error(s"Error in multi search: ${response.getFailureMessage}") - List.empty[(U, List[I])] - } else { - Try( - new JsonParser().parse(response.getResponse.toString).getAsJsonObject ~> [U, I] innerField - ) match { - case Success(s) => s - case Failure(f) => - logger.error(f.getMessage, f) - List.empty } + req } - } - } + )( + executor = (req, listener) => apply().msearchAsync(req, RequestOptions.DEFAULT, listener) + )(response => Some(Strings.toString(response))) } -trait RestHighLevelClientBulkApi - extends RestHighLevelClientRefreshApi - with RestHighLevelClientSettingsApi - with RestHighLevelClientIndicesApi - with BulkApi { - override type A = DocWriteRequest[_] - override type R = BulkResponse +/** Bulk API implementation for RestHighLevelClient + * @see + * [[BulkApi]] for generic API documentation + */ +trait RestHighLevelClientBulkApi extends BulkApi with RestHighLevelClientHelpers { + _: RefreshApi with SettingsApi with IndexApi with RestHighLevelClientCompanion => - override def toBulkAction(bulkItem: BulkItem): A = { - import bulkItem._ - val request = action match { - case BulkAction.UPDATE => - val r = new UpdateRequest(index, null, if (id.isEmpty) null else id.get) - .doc(body, XContentType.JSON) - .docAsUpsert(true) - parent.foreach(r.parent) - r - case BulkAction.DELETE => - val r = new DeleteRequest(index).id(id.getOrElse("_all")) - parent.foreach(r.parent) - r - case _ => - val r = new IndexRequest(index).source(body, XContentType.JSON) - id.foreach(r.id) - parent.foreach(r.parent) - r + override type BulkActionType = DocWriteRequest[_] + override type BulkResultType = BulkResponse + + override implicit def toBulkElasticAction(a: BulkActionType): BulkElasticAction = { + new BulkElasticAction { + override def index: String = a.index } - request } - override def bulkResult: Flow[R, Set[String], NotUsed] = - Flow[BulkResponse] - .named("result") - .map(result => { - val items = result.getItems - val grouped = items.groupBy(_.getIndex) - val indices = grouped.keys.toSet - for (index <- indices) { - logger - .info(s"Bulk operation succeeded for index $index with ${grouped(index).length} items.") - } - indices - }) - - override def bulk(implicit + /** Basic flow for executing a bulk action. This method must be implemented by concrete classes + * depending on the Elasticsearch version and client used. + * + * @param bulkOptions + * configuration options + * @return + * Flow transforming bulk actions into results + */ + override private[client] def bulkFlow(implicit bulkOptions: BulkOptions, system: ActorSystem - ): Flow[Seq[A], R, NotUsed] = { + ): Flow[Seq[BulkActionType], BulkResultType, NotUsed] = { val parallelism = Math.max(1, bulkOptions.balance) - Flow[Seq[A]] + Flow[Seq[BulkActionType]] .named("bulk") .mapAsyncUnordered[R](parallelism) { items => - val request = new BulkRequest(bulkOptions.index, bulkOptions.documentType) + val request = new BulkRequest(bulkOptions.defaultIndex, bulkOptions.defaultType) items.foreach(request.add) val promise: Promise[R] = Promise[R]() apply().bulkAsync( @@ -936,21 +852,457 @@ trait RestHighLevelClientBulkApi } } - private[this] def toBulkElasticResultItem(i: BulkItemResponse): BulkElasticResultItem = - new BulkElasticResultItem { - override def index: String = i.getIndex + /** Convert a BulkResultType into individual results. This method must extract the successes and + * failures from the ES response. + * + * @param result + * raw result from the bulk + * @return + * sequence of Right(id) for success or Left(failed) for failure + */ + override private[client] def extractBulkResults( + result: BulkResultType, + originalBatch: Seq[BulkItem] + ): Seq[Either[FailedDocument, SuccessfulDocument]] = { + // no results at all + if ( + originalBatch.nonEmpty && + (result == null || (result.getItems == null || result.getItems.isEmpty)) + ) { + logger.error("Bulk result is null or has no items") + return originalBatch.map { item => + Left( + FailedDocument( + id = item.id.getOrElse("unknown"), + index = item.index, + document = item.document, + error = BulkError( + message = "Null bulk result", + `type` = "internal_error", + status = 500 + ), + retryable = false + ) + ) + } + } + + // process failed items + val failedItems = result.getItems.filter(_.isFailed).map { item => + val failure = item.getFailure + val statusCode = item.status().getStatus + val errorType = Option(failure.getType).getOrElse("unknown") + val errorReason = Option(failure.getMessage).getOrElse("Unknown error") + + val itemId = item.getId + val itemIndex = item.getIndex + + val originalItemOpt = originalBatch + .find(o => o.id.contains(itemId) && o.index == itemIndex) + + // Determine if the error is retryable + val isRetryable = originalItemOpt.isDefined && (BulkErrorAnalyzer.isRetryable(statusCode) || + BulkErrorAnalyzer.isRetryableByType(errorType)) + + val originalItem = originalItemOpt.getOrElse( + BulkItem( + index = itemIndex, + id = Some(itemId), + document = "", + parent = None, + action = item.getOpType match { + case DocWriteRequest.OpType.INDEX => BulkAction.INDEX + case DocWriteRequest.OpType.CREATE => BulkAction.INDEX + case DocWriteRequest.OpType.UPDATE => BulkAction.UPDATE + case DocWriteRequest.OpType.DELETE => BulkAction.DELETE + } + ) + ) + + Left( + FailedDocument( + id = originalItem.id.getOrElse("unknown"), + index = originalItem.index, + document = originalItem.document, + error = BulkError( + message = errorReason, + `type` = errorType, + status = statusCode + ), + retryable = isRetryable + ) + ) } - override implicit def toBulkElasticAction(a: DocWriteRequest[_]): BulkElasticAction = { - new BulkElasticAction { - override def index: String = a.index + // process successful items + val successfulItems = + result.getItems.filterNot(_.isFailed).map { item => + Right(SuccessfulDocument(id = item.getId, index = item.getIndex)) + } + + val results = failedItems ++ successfulItems + + // if no individual results but overall failure, mark all as failed + if (results.isEmpty && originalBatch.nonEmpty) { + val statusCode = result.status().getStatus + val errorString = result.buildFailureMessage() + logger.error(s"Bulk operation completed with errors: $errorString") + val bulkError = + BulkError( + message = errorString, + `type` = "unknown", + status = statusCode + ) + return originalBatch.map { item => + Left( + FailedDocument( + id = item.id.getOrElse("unknown"), + index = item.index, + document = item.document, + error = bulkError, + retryable = BulkErrorAnalyzer.isRetryable(statusCode) + ) + ) + } + } + + results + } + + override def toBulkAction(bulkItem: BulkItem): A = { + import bulkItem._ + val request = action match { + case BulkAction.UPDATE => + val r = new UpdateRequest(bulkItem.index, null, if (id.isEmpty) null else id.get) + .doc(document, XContentType.JSON) + .docAsUpsert(true) + parent.foreach(r.parent) + r + case BulkAction.DELETE => + val r = new DeleteRequest(bulkItem.index).id(id.getOrElse("_all")) + parent.foreach(r.parent) + r + case _ => + val r = new IndexRequest(bulkItem.index).source(document, XContentType.JSON) + id.foreach(r.id) + parent.foreach(r.parent) + r + } + request + } + + /** Conversion BulkActionType -> BulkItem */ + override private[client] def actionToBulkItem(action: BulkActionType): BulkItem = { + action match { + case req: IndexRequest => + BulkItem( + index = req.index(), + id = Option(req.id()), + document = req.source().utf8ToString(), + parent = Option(req.parent()), + action = BulkAction.INDEX + ) + case req: UpdateRequest => + BulkItem( + index = req.index(), + id = Option(req.id()), + document = req.doc().source().utf8ToString(), + parent = Option(req.parent()), + action = BulkAction.UPDATE + ) + case req: DeleteRequest => + BulkItem( + index = req.index(), + id = Option(req.id()), + document = "", + parent = Option(req.parent()), + action = BulkAction.DELETE + ) + case _ => + throw new IllegalArgumentException( + s"Unsupported BulkActionType: ${action.getClass.getName}" + ) + } + } + +} + +/** Scroll API implementation for RestHighLevelClient + * @see + * [[ScrollApi]] for generic API documentation + */ +trait RestHighLevelClientScrollApi extends ScrollApi with RestHighLevelClientHelpers { + _: VersionApi with SearchApi with RestHighLevelClientCompanion => + + /** Classic scroll (works for both hits and aggregations) + */ + override private[client] def scrollClassic( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation], + config: ScrollConfig + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = { + implicit val ec: ExecutionContext = system.dispatcher + Source + .unfoldAsync[Option[String], Seq[Map[String, Any]]](None) { scrollIdOpt => + retryWithBackoff(config.retryConfig) { + Future { + scrollIdOpt match { + case None => + // Initial search with scroll + logger.info( + s"Starting classic scroll on indices: ${elasticQuery.indices.mkString(", ")}" + ) + + val query = elasticQuery.query + // Create a parser for the query + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + query + ) + // Execute the search + val searchRequest = + new SearchRequest(elasticQuery.indices: _*) + .types(elasticQuery.types: _*) + .source( + SearchSourceBuilder.fromXContent(xContentParser).size(config.scrollSize) + ) + + searchRequest.scroll( + TimeValue.parseTimeValue(config.keepAlive, "scroll_timeout") + ) + + val response = apply().search(searchRequest, RequestOptions.DEFAULT) + + if (response.status() != RestStatus.OK) { + throw new IOException(s"Initial scroll failed with status: ${response.status()}") + } + + val scrollId = response.getScrollId + + if (scrollId == null) { + throw new IllegalStateException("Scroll ID is null in response") + } + + // Extract both hits AND aggregations + val results = extractAllResults(response, fieldAliases, aggregations) + + logger.info(s"Initial scroll returned ${results.size} results, scrollId: $scrollId") + + if (results.isEmpty) { + None + } else { + Some((Some(scrollId), results)) + } + + case Some(scrollId) => + // Subsequent scroll requests + logger.debug(s"Fetching next scroll batch (scrollId: $scrollId)") + + val scrollRequest = new SearchScrollRequest(scrollId) + scrollRequest.scroll( + TimeValue.parseTimeValue(config.keepAlive, "scroll_timeout") + ) + + val result = apply().scroll(scrollRequest, RequestOptions.DEFAULT) + + if (result.status() != RestStatus.OK) { + clearScroll(scrollId) + throw new IOException( + s"Scroll continuation failed with status: ${result.status()}" + ) + } + + val newScrollId = result.getScrollId + val results = extractAllResults(result, fieldAliases, aggregations) + + logger.debug(s"Scroll returned ${results.size} results") + + if (results.isEmpty) { + clearScroll(scrollId) + None + } else { + Some((Some(newScrollId), results)) + } + } + } + }(system, logger).recover { case ex: Exception => + logger.error(s"Scroll failed after retries: ${ex.getMessage}", ex) + scrollIdOpt.foreach(clearScroll) + None + } + } + .mapConcat(identity) + } + + /** Search After (only for hits, more efficient) + * @note + * Uses Array[Object] for searchAfter values to match RestHighLevelClient API + */ + override private[client] def searchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean = false + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = { + implicit val ec: ExecutionContext = system.dispatcher + Source + .unfoldAsync[Option[Array[Object]], Seq[Map[String, Any]]](None) { searchAfterOpt => + retryWithBackoff(config.retryConfig) { + Future { + searchAfterOpt match { + case None => + logger.info( + s"Starting search_after on indices: ${elasticQuery.indices.mkString(", ")}" + ) + case Some(values) => + logger.debug(s"Fetching next search_after batch (after: ${if (values.length > 3) + s"[${values.take(3).mkString(", ")}...]" + else values.mkString(", ")})") + } + + val query = elasticQuery.query + // Create a parser for the query + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + query + ) + val sourceBuilder = + SearchSourceBuilder.fromXContent(xContentParser).size(config.scrollSize) + + // Check if sorts already exist in the query + if (!hasSorts && sourceBuilder.sorts() == null) { + logger.warn( + "No sort fields in query for search_after, adding default _id sort. " + + "This may lead to inconsistent results if documents are updated during scroll." + ) + sourceBuilder.sort("_id", SortOrder.ASC) + } else if (hasSorts && sourceBuilder.sorts() != null) { + // Sorts already present, check that a tie-breaker exists + val hasIdSort = sourceBuilder.sorts().asScala.exists { sortBuilder => + sortBuilder match { + case fieldSort: FieldSortBuilder => + fieldSort.getFieldName == "_id" + case _ => + false + } + } + if (!hasIdSort) { + // Add _id as tie-breaker + logger.debug("Adding _id as tie-breaker to existing sorts") + sourceBuilder.sort("_id", SortOrder.ASC) + } + } + + // Add search_after if available + searchAfterOpt.foreach { searchAfter => + sourceBuilder.searchAfter(searchAfter) + } + + // Execute the search + val searchRequest = + new SearchRequest(elasticQuery.indices: _*) + .types(elasticQuery.types: _*) + .source( + sourceBuilder + ) + + val response = apply().search(searchRequest, RequestOptions.DEFAULT) + + if (response.status() != RestStatus.OK) { + throw new IOException(s"Search after failed with status: ${response.status()}") + } + + // Extract ONLY hits (no aggregations for search_after) + val hits = extractHitsOnly(response, fieldAliases) + + if (hits.isEmpty) { + None + } else { + val searchHits = response.getHits.getHits + val lastHit = searchHits.last + val nextSearchAfter = Option(lastHit.getSortValues) + + logger.debug( + s"Retrieved ${hits.size} hits, next search_after: ${nextSearchAfter + .map(arr => + if (arr.length > 3) s"[${arr.take(3).mkString(", ")}...]" + else arr.mkString(", ") + ) + .getOrElse("None")}" + ) + + Some((nextSearchAfter, hits)) + } + } + }(system, logger).recover { case ex: Exception => + logger.error(s"Search after failed after retries: ${ex.getMessage}", ex) + None + } + } + .mapConcat(identity) + } + + override private[client] def pitSearchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = + throw new NotImplementedError("PIT search after not implemented for Elasticsearch 6") + + /** Extract ALL results: hits + aggregations This is crucial for queries with aggregations + */ + private def extractAllResults( + response: SearchResponse, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + ): Seq[Map[String, Any]] = { + val jsonString = response.toString + val sqlResponse = + ElasticResponse("", jsonString, fieldAliases, aggregations.map(kv => kv._1 -> kv._2)) + + parseResponse(sqlResponse) match { + case Success(rows) => + logger.debug(s"Parsed ${rows.size} rows from response") + rows + case Failure(ex) => + logger.error(s"Failed to parse scroll response: ${ex.getMessage}", ex) + Seq.empty + } + } + + /** Extract ONLY hits (for search_after optimization) + */ + private def extractHitsOnly( + response: SearchResponse, + fieldAliases: Map[String, String] + ): Seq[Map[String, Any]] = { + val jsonString = response.toString + val sqlResponse = ElasticResponse("", jsonString, fieldAliases, Map.empty) + + parseResponse(sqlResponse) match { + case Success(rows) => rows + case Failure(ex) => + logger.error(s"Failed to parse search after response: ${ex.getMessage}", ex) + Seq.empty } } - override implicit def toBulkElasticResult(r: BulkResponse): BulkElasticResult = { - new BulkElasticResult { - override def items: List[BulkElasticResultItem] = - r.getItems.toList.map(toBulkElasticResultItem) + private def clearScroll(scrollId: String): Unit = { + Try { + logger.debug(s"Clearing scroll: $scrollId") + val clearScrollRequest = new ClearScrollRequest() + clearScrollRequest.addScrollId(scrollId) + apply().clearScroll(clearScrollRequest, RequestOptions.DEFAULT) + }.recover { case ex: Exception => + logger.warn(s"Failed to clear scroll $scrollId: ${ex.getMessage}") } } } diff --git a/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientCompanion.scala b/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientCompanion.scala index cb3098d7..6461a13b 100644 --- a/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientCompanion.scala +++ b/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientCompanion.scala @@ -16,56 +16,94 @@ package app.softnetwork.elastic.client.rest -import app.softnetwork.elastic.client.ElasticConfig -import com.sksamuel.exts.Logging -import org.apache.http.HttpHost +import app.softnetwork.elastic.client.ElasticClientCompanion +import org.elasticsearch.client.{RequestOptions, RestClient, RestClientBuilder, RestHighLevelClient} import org.apache.http.auth.{AuthScope, UsernamePasswordCredentials} import org.apache.http.impl.client.BasicCredentialsProvider -import org.apache.http.impl.nio.client.HttpAsyncClientBuilder -import org.elasticsearch.client.{RestClient, RestClientBuilder, RestHighLevelClient} +import org.elasticsearch.search.SearchModule import org.elasticsearch.common.settings.Settings import org.elasticsearch.common.xcontent.NamedXContentRegistry import org.elasticsearch.plugins.SearchPlugin -import org.elasticsearch.search.SearchModule +import org.slf4j.{Logger, LoggerFactory} -import scala.collection.JavaConverters._ +import scala.jdk.CollectionConverters._ +import scala.util.{Failure, Success, Try} -trait RestHighLevelClientCompanion extends Logging { +/** Thread-safe companion for RestHighLevelClient with lazy initialization and proper resource + * management + */ +trait RestHighLevelClientCompanion extends ElasticClientCompanion[RestHighLevelClient] { - def elasticConfig: ElasticConfig - - private var client: Option[RestHighLevelClient] = None + val logger: Logger = LoggerFactory getLogger getClass.getName + /** Lazy-initialized NamedXContentRegistry (thread-safe by Scala lazy val) + */ lazy val namedXContentRegistry: NamedXContentRegistry = { - // import scala.jdk.CollectionConverters._ val searchModule = new SearchModule(Settings.EMPTY, false, List.empty[SearchPlugin].asJava) new NamedXContentRegistry(searchModule.getNamedXContents) } - def apply(): RestHighLevelClient = { - client match { - case Some(c) => c - case _ => + /** Create and configure RestHighLevelClient Separated for better testability and error handling + */ + override protected def createClient(): RestHighLevelClient = { + try { + val restClientBuilder = buildRestClient() + new RestHighLevelClient(restClientBuilder) + } catch { + case ex: Exception => + logger.error(s"Failed to create RestHighLevelClient: ${ex.getMessage}", ex) + throw new IllegalStateException("Cannot create Elasticsearch client", ex) + } + } + + /** Build RestClientBuilder with credentials and configuration + */ + private def buildRestClient(): RestClientBuilder = { + val httpHost = parseHttpHost(elasticConfig.credentials.url) + + val builder = RestClient + .builder(httpHost) + .setRequestConfigCallback { requestConfigBuilder => + requestConfigBuilder + .setConnectTimeout(elasticConfig.connectionTimeout.toMillis.toInt) + .setSocketTimeout(elasticConfig.socketTimeout.toMillis.toInt) + } + + // Add credentials if provided + if (elasticConfig.credentials.username.nonEmpty) { + builder.setHttpClientConfigCallback { httpClientBuilder => val credentialsProvider = new BasicCredentialsProvider() - if (elasticConfig.credentials.username.nonEmpty) { - credentialsProvider.setCredentials( - AuthScope.ANY, - new UsernamePasswordCredentials( - elasticConfig.credentials.username, - elasticConfig.credentials.password - ) - ) - } - val restClientBuilder: RestClientBuilder = RestClient - .builder( - HttpHost.create(elasticConfig.credentials.url) + credentialsProvider.setCredentials( + AuthScope.ANY, + new UsernamePasswordCredentials( + elasticConfig.credentials.username, + elasticConfig.credentials.password ) - .setHttpClientConfigCallback((httpAsyncClientBuilder: HttpAsyncClientBuilder) => - httpAsyncClientBuilder.setDefaultCredentialsProvider(credentialsProvider) - ) - val c = new RestHighLevelClient(restClientBuilder) - client = Some(c) - c + ) + httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider) + } + } else { + builder } } + + /** Test connection to Elasticsearch cluster + * @return + * true if connection is successful + */ + override def testConnection(): Boolean = { + Try { + val c = apply() + val response = c.info(RequestOptions.DEFAULT) + logger.info(s"Connected to Elasticsearch ${response.getVersion}") + true + } match { + case Success(result) => result + case Failure(ex) => + logger.error(s"Connection test failed: ${ex.getMessage}", ex) + incrementFailures() + false + } + } + } diff --git a/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientHelpers.scala b/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientHelpers.scala new file mode 100644 index 00000000..afa5f20e --- /dev/null +++ b/es6/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientHelpers.scala @@ -0,0 +1,449 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.rest + +import app.softnetwork.elastic.client.ElasticClientHelpers +import app.softnetwork.elastic.client.result.{ElasticError, ElasticResult} + +import scala.concurrent.Promise +import scala.util.{Failure, Success, Try} + +trait RestHighLevelClientHelpers extends ElasticClientHelpers { _: RestHighLevelClientCompanion => + + // ======================================================================== + // GENERIC METHODS FOR EXECUTING REST HIGH LEVEL CLIENT ACTIONS + // ======================================================================== + + //format:off + /** Execute a Rest High Level Client action with a generic transformation of the result. + * + * @tparam Req + * type of the request + * @tparam Resp + * type of the response + * @tparam T + * type of the desired final result + * @param operation + * name of the operation (for logging and error context) + * @param index + * relevant index (optional, for logging) + * @param retryable + * true if the operation can be retried in case of a transient error + * @param request + * the request to be executed + * @param executor + * function executing the request and returning the response + * @param transformer + * function transforming the response into T + * @return + * ElasticResult[T] + * + * @example + * {{{ + * executeRestAction[CreateIndexRequest, CreateIndexResponse, Boolean]( + * operation = "createIndex", + * index = Some("my-index"), + * retryable = false + * )( + * request = new CreateIndexRequest("my-index") + * )( + * executor = req => apply().indices().create(req, RequestOptions.DEFAULT) + * )( + * transformer = resp => resp.isAcknowledged + * ) + * }}} + */ + //format:on + private[client] def executeRestAction[Req, Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + request: => Req + )( + executor: Req => Resp + )( + transformer: Resp => T + ): ElasticResult[T] = { + val indexStr = index.map(i => s" on index '$i'").getOrElse("") + logger.debug(s"Executing operation '$operation'$indexStr") + + // ✅ Execution with exception handling + val tryResult: Try[Resp] = Try { + executor(request) + } + + // ✅ Conversion to ElasticResult[Resp] + val elasticResult: ElasticResult[Resp] = tryResult match { + case Success(result) => + ElasticResult.success(result) + case Failure(ex: org.elasticsearch.ElasticsearchException) => + // Extract status code from Elasticsearch exception + val statusCode = Option(ex.status()).map(_.getStatus) + logger.error( + s"Elasticsearch exception during operation '$operation'$indexStr: ${ex.getMessage}", + ex + ) + ElasticResult.failure( + ElasticError( + message = s"Elasticsearch error during $operation: ${ex.getDetailedMessage}", + cause = Some(ex), + statusCode = statusCode, + operation = Some(operation) + ) + ) + case Failure(ex) => + logger.error(s"Exception during operation '$operation'$indexStr: ${ex.getMessage}", ex) + ElasticResult.failure( + ElasticError( + message = s"Exception during $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + } + + // ✅ Apply transformation + elasticResult.flatMap { result => + Try(transformer(result)) match { + case Success(transformed) => + logger.debug(s"Operation '$operation'$indexStr succeeded") + ElasticResult.success(transformed) + case Failure(ex) => + logger.error(s"Transformation failed for operation '$operation'$indexStr", ex) + ElasticResult.failure( + ElasticError( + message = s"Failed to transform result: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + } + } + } + + /** Simplified variant for operations returning Boolean values (acknowledged). + * + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param request + * the request to be executed + * @param executor + * function executing the request + * @return + * ElasticResult[Boolean] + */ + private[client] def executeRestBooleanAction[ + Req, + Resp <: org.elasticsearch.action.support.master.AcknowledgedResponse + ]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + request: => Req + )( + executor: Req => Resp + ): ElasticResult[Boolean] = { + executeRestAction[Req, Resp, Boolean](operation, index, retryable)(request)(executor)( + _.isAcknowledged + ) + } + + //format:off + /** Variant to execute an action using the low-level REST client. Useful for operations not + * supported by the high-level client. + * + * @tparam T + * type of the final result + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param request + * the low-level Request + * @param transformer + * function transforming the Response into T + * @return + * ElasticResult[T] + * + * @example + * {{{ + * executeRestLowLevelAction[String]( + * operation = "customEndpoint", + * index = Some("my-index") + * )( + * request = new Request("GET", "/my-index/_custom") + * )( + * transformer = resp => EntityUtils.toString(resp.getEntity) + * ) + * }}} + */ + //format:on + private[client] def executeRestLowLevelAction[T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + request: => org.elasticsearch.client.Request + )( + transformer: org.elasticsearch.client.Response => T + ): ElasticResult[T] = { + val indexStr = index.map(i => s" on index '$i'").getOrElse("") + logger.debug(s"Executing low-level operation '$operation'$indexStr") + + // ✅ Execution with exception handling + val tryResult: Try[org.elasticsearch.client.Response] = Try { + apply().getLowLevelClient.performRequest(request) + } + + // ✅ Conversion to ElasticResult[Response] + val elasticResult: ElasticResult[org.elasticsearch.client.Response] = tryResult match { + case Success(result) => + ElasticResult.success(result) + case Failure(ex: org.elasticsearch.client.ResponseException) => + val statusCode = Some(ex.getResponse.getStatusLine.getStatusCode) + logger.error( + s"Response exception during operation '$operation'$indexStr: ${ex.getMessage}", + ex + ) + ElasticResult.failure( + ElasticError( + message = s"HTTP error during $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = statusCode, + operation = Some(operation) + ) + ) + case Failure(ex) => + logger.error(s"Exception during operation '$operation'$indexStr: ${ex.getMessage}", ex) + ElasticResult.failure( + ElasticError( + message = s"Exception during $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + } + + // ✅ Check status and apply transformation + elasticResult.flatMap { result => + val statusCode = result.getStatusLine.getStatusCode + + if (statusCode >= 200 && statusCode < 300) { + // ✅ Success: applying the transformation + Try(transformer(result)) match { + case Success(transformed) => + logger.debug(s"Operation '$operation'$indexStr succeeded with status $statusCode") + ElasticResult.success(transformed) + case Failure(ex) => + logger.error(s"Transformation failed for operation '$operation'$indexStr", ex) + ElasticResult.failure( + ElasticError( + message = s"Failed to transform result: ${ex.getMessage}", + cause = Some(ex), + statusCode = Some(statusCode), + operation = Some(operation) + ) + ) + } + } else { + // ✅ Failure: extract the error + val errorMessage = Option(result.getStatusLine.getReasonPhrase) + .filter(_.nonEmpty) + .getOrElse("Unknown error") + + val error = ElasticError( + message = errorMessage, + cause = None, + statusCode = Some(statusCode), + operation = Some(operation) + ) + + logError(operation, indexStr, error) + ElasticResult.failure(error) + } + } + } + + //format:off + /** Asynchronous variant to execute a Rest High Level Client action. + * + * @tparam Req + * type of the request + * @tparam Resp + * type of the response + * @tparam T + * type of the desired final result + * @param operation + * name of the operation + * @param index + * relevant index (optional) + * @param retryable + * true if retryable + * @param request + * the request to be executed + * @param executor + * function executing the request asynchronously + * @param transformer + * function transforming the response into T + * @return + * Future[ElasticResult[T]] + * + * @example + * {{{ + * executeAsyncRestAction[IndexRequest, IndexResponse, String]( + * operation = "indexDocument", + * index = Some("my-index") + * )( + * request = new IndexRequest("my-index").source(...) + * )( + * executor = (req, listener) => apply().indexAsync(req, RequestOptions.DEFAULT, listener) + * )( + * transformer = resp => resp.getId + * ) + * }}} + */ + //format:on + private[client] def executeAsyncRestAction[Req, Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + request: => Req + )( + executor: (Req, org.elasticsearch.action.ActionListener[Resp]) => Unit + )( + transformer: Resp => T + )(implicit ec: scala.concurrent.ExecutionContext): scala.concurrent.Future[ElasticResult[T]] = { + val indexStr = index.map(i => s" on index '$i'").getOrElse("") + logger.debug(s"Executing operation '$operation'$indexStr asynchronously") + + val promise: Promise[ElasticResult[T]] = Promise() + + try { + val listener = new org.elasticsearch.action.ActionListener[Resp] { + override def onResponse(response: Resp): Unit = { + logger.debug(s"Operation '$operation'$indexStr succeeded asynchronously") + + // ✅ Success: applying the transformation + Try(transformer(response)) match { + case Success(transformed) => + promise.success(ElasticResult.success(transformed)) + case Failure(ex) => + logger.error(s"Transformation failed for operation '$operation'$indexStr", ex) + promise.success( + ElasticResult.failure( + ElasticError( + message = s"Failed to transform result: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + ) + } + } + + override def onFailure(ex: Exception): Unit = { + val (message, statusCode) = ex match { + case esEx: org.elasticsearch.ElasticsearchException => + ( + s"Elasticsearch error during $operation: ${esEx.getDetailedMessage}", + Option(esEx.status()).map(_.getStatus) + ) + case _ => + (s"Exception during $operation: ${ex.getMessage}", None) + } + + logger.error(s"Exception during operation '$operation'$indexStr: ${ex.getMessage}", ex) + + promise.success( + ElasticResult.failure( + ElasticError( + message = message, + cause = Some(ex), + statusCode = statusCode, + operation = Some(operation) + ) + ) + ) + } + } + + executor(request, listener) + } catch { + case ex: Exception => + logger.error(s"Failed to initiate async operation '$operation'$indexStr", ex) + promise.success( + ElasticResult.failure( + ElasticError( + message = s"Failed to initiate $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + ) + } + + promise.future + } + + /** Simplified asynchronous variant for operations returning Boolean values. + * + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param request + * the request to be executed + * @param executor + * function executing the request asynchronously + * @return + * Future of ElasticResult[Boolean] + */ + private[client] def executeAsyncRestBooleanAction[ + Req, + Resp <: org.elasticsearch.action.support.master.AcknowledgedResponse + ]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + request: => Req + )( + executor: (Req, org.elasticsearch.action.ActionListener[Resp]) => Unit + )(implicit + ec: scala.concurrent.ExecutionContext + ): scala.concurrent.Future[ElasticResult[Boolean]] = { + executeAsyncRestAction[Req, Resp, Boolean](operation, index, retryable)(request)(executor)( + _.isAcknowledged + ) + } +} diff --git a/es6/rest/src/main/scala/app/softnetwork/elastic/client/spi/RestHighLevelClientSpi.scala b/es6/rest/src/main/scala/app/softnetwork/elastic/client/spi/RestHighLevelClientSpi.scala new file mode 100644 index 00000000..9f17cb37 --- /dev/null +++ b/es6/rest/src/main/scala/app/softnetwork/elastic/client/spi/RestHighLevelClientSpi.scala @@ -0,0 +1,46 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.spi + +import app.softnetwork.elastic.client.ElasticClientApi +import app.softnetwork.elastic.client.rest.RestHighLevelClientApi +import com.typesafe.config.Config + +class RestHighLevelClientSpi extends ElasticClientSpi { + + //format:off + /** Creates an Elasticsearch client instance. + * + * @param conf + * Typesafe configuration containing Elasticsearch parameters + * @return + * Configured ElasticClientApi instance + * @example + * {{{ + * class MyElasticClientProvider extends ElasticClientSpi { + * override def client(config: Config): ElasticClientApi = { + * new MyElasticClientImpl(config) + * } + * } + * }}} + */ + //format:on + override def client(conf: Config): ElasticClientApi = + new RestHighLevelClientApi { + override def config: Config = conf + } +} diff --git a/es6/rest/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithRestProvider.scala b/es6/rest/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithRestProvider.scala deleted file mode 100644 index 2e6eee32..00000000 --- a/es6/rest/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithRestProvider.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2025 SOFTNETWORK - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package app.softnetwork.elastic.persistence.query - -import app.softnetwork.persistence.message.CrudEvent -import app.softnetwork.persistence.model.Timestamped -import app.softnetwork.persistence.query.{JournalProvider, OffsetProvider} - -trait State2ElasticProcessorStreamWithRestProvider[T <: Timestamped, E <: CrudEvent] - extends State2ElasticProcessorStream[T, E] - with RestHighLevelClientProvider[T] { _: JournalProvider with OffsetProvider => } diff --git a/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientCompanionSpec.scala b/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientCompanionSpec.scala new file mode 100644 index 00000000..b467f8a3 --- /dev/null +++ b/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientCompanionSpec.scala @@ -0,0 +1,119 @@ +package app.softnetwork.elastic.client + +import akka.actor.ActorSystem +import app.softnetwork.elastic.client.rest.RestHighLevelClientCompanion +import app.softnetwork.elastic.scalatest.EmbeddedElasticTestKit +import app.softnetwork.persistence.generateUUID +import com.typesafe.config.ConfigFactory +import configs.ConfigReader +import org.scalatest.wordspec.AnyWordSpec +import org.scalatest.matchers.should.Matchers +import org.scalatest.concurrent.ScalaFutures +import org.slf4j.{Logger, LoggerFactory} + +import java.util.concurrent.TimeUnit +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, ExecutionContextExecutor, Future} +import scala.util.Try + +class RestHighLevelClientCompanionSpec + extends AnyWordSpec + with EmbeddedElasticTestKit + with Matchers + with ScalaFutures { + + lazy val log: Logger = LoggerFactory getLogger getClass.getName + + implicit val system: ActorSystem = ActorSystem(generateUUID()) + + implicit val executionContext: ExecutionContextExecutor = system.dispatcher + + override def afterAll(): Unit = { + Await.result(system.terminate(), Duration(30, TimeUnit.SECONDS)) + super.afterAll() + } + + "RestHighLevelClientCompanion" should { + + "initialize client lazily" in { + val companion = TestCompanion() + companion.isInitialized shouldBe false + + val client = companion.apply() + client should not be null + companion.isInitialized shouldBe true + } + + "return same instance on multiple calls" in { + val companion = TestCompanion() + val client1 = companion.apply() + val client2 = companion.apply() + + client1 should be theSameInstanceAs client2 + } + + "be thread-safe during initialization" in { + val companion = TestCompanion() + val futures = (1 to 100).map { _ => + Future { companion.apply() } + } + + val clients = Future.sequence(futures).futureValue + + // Tous les clients doivent être la même instance + clients.distinct.size shouldBe 1 + } + + "close client properly" in { + val companion = TestCompanion() + companion.apply() + companion.isInitialized shouldBe true + + companion.close() + companion.isInitialized shouldBe false + } + + "handle invalid URL gracefully" in { + val companion = TestCompanion("invalid-url") + + Try(an[IllegalArgumentException] should be thrownBy { + companion.apply() + }) + } + + "test connection successfully" in { + val companion = TestCompanion() + companion.testConnection() shouldBe true + } + } + + case class TestCompanion(config: ElasticConfig) extends RestHighLevelClientCompanion { + override def elasticConfig: ElasticConfig = config + } + + object TestCompanion { + def apply(): TestCompanion = TestCompanion( + ConfigReader[ElasticConfig] + .read(elasticConfig.withFallback(ConfigFactory.load("softnetwork-elastic.conf")), "elastic") + .toEither match { + case Left(configError) => + throw configError.configException + case Right(r) => r + } + ) + def apply(url: String): TestCompanion = TestCompanion( + ConfigReader[ElasticConfig] + .read( + ConfigFactory + .parseString(elasticConfigAsString) + .withFallback(ConfigFactory.load("softnetwork-elastic.conf")), + "elastic" + ) + .toEither match { + case Left(configError) => + throw configError.configException + case Right(r) => r.copy(credentials = ElasticCredentials(url)) + } + ) + } +} diff --git a/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientSpec.scala b/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientSpec.scala index b8aeba67..9ea35a61 100644 --- a/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientSpec.scala +++ b/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientSpec.scala @@ -1,28 +1,3 @@ package app.softnetwork.elastic.client -import app.softnetwork.elastic.client.RestHighLevelProviders.{ - BinaryProvider, - ParentProvider, - PersonProvider, - SampleProvider -} -import app.softnetwork.elastic.model.{Binary, Parent, Sample} -import app.softnetwork.elastic.persistence.query.ElasticProvider -import app.softnetwork.persistence.person.model.Person - -class RestHighLevelClientSpec extends ElasticClientSpec { - - lazy val pClient: ElasticProvider[Person] with ElasticClientApi = new PersonProvider( - elasticConfig - ) - lazy val sClient: ElasticProvider[Sample] with ElasticClientApi = new SampleProvider( - elasticConfig - ) - lazy val bClient: ElasticProvider[Binary] with ElasticClientApi = new BinaryProvider( - elasticConfig - ) - - override def parentClient: ElasticProvider[Parent] with ElasticClientApi = new ParentProvider( - elasticConfig - ) -} +class RestHighLevelClientSpec extends ElasticClientSpec diff --git a/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelProviders.scala b/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelProviders.scala deleted file mode 100644 index 4356c541..00000000 --- a/es6/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelProviders.scala +++ /dev/null @@ -1,51 +0,0 @@ -package app.softnetwork.elastic.client - -import app.softnetwork.elastic.model.{Binary, Parent, Sample} -import app.softnetwork.elastic.persistence.query.RestHighLevelClientProvider -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.person.model.Person -import com.typesafe.config.Config -import org.elasticsearch.client.RestHighLevelClient - -object RestHighLevelProviders { - - class PersonProvider(es: Config) - extends RestHighLevelClientProvider[Person] - with ManifestWrapper[Person] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val restHighLevelClient: RestHighLevelClient = apply() - } - - class SampleProvider(es: Config) - extends RestHighLevelClientProvider[Sample] - with ManifestWrapper[Sample] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val restHighLevelClient: RestHighLevelClient = apply() - } - - class BinaryProvider(es: Config) - extends RestHighLevelClientProvider[Binary] - with ManifestWrapper[Binary] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val restHighLevelClient: RestHighLevelClient = apply() - } - - class ParentProvider(es: Config) - extends RestHighLevelClientProvider[Parent] - with ManifestWrapper[Parent] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val restHighLevelClient: RestHighLevelClient = apply() - } -} diff --git a/es6/rest/src/test/scala/app/softnetwork/elastic/persistence/person/RestHighLevelClientPersonHandlerSpec.scala b/es6/rest/src/test/scala/app/softnetwork/elastic/persistence/person/RestHighLevelClientPersonHandlerSpec.scala index 8c5e86d9..35f469f8 100644 --- a/es6/rest/src/test/scala/app/softnetwork/elastic/persistence/person/RestHighLevelClientPersonHandlerSpec.scala +++ b/es6/rest/src/test/scala/app/softnetwork/elastic/persistence/person/RestHighLevelClientPersonHandlerSpec.scala @@ -1,32 +1,3 @@ package app.softnetwork.elastic.persistence.person -import akka.actor.typed.ActorSystem -import app.softnetwork.elastic.client.rest.RestHighLevelClientApi -import app.softnetwork.elastic.persistence.query.{ElasticProvider, PersonToElasticProcessorStream} -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.person.model.Person -import app.softnetwork.persistence.person.query.PersonToExternalProcessorStream -import app.softnetwork.persistence.query.ExternalPersistenceProvider -import com.typesafe.config.Config -import org.slf4j.{Logger, LoggerFactory} - -class RestHighLevelClientPersonHandlerSpec extends ElasticPersonTestKit { - - override def externalPersistenceProvider: ExternalPersistenceProvider[Person] = - new ElasticProvider[Person] with RestHighLevelClientApi with ManifestWrapper[Person] { - override protected val manifestWrapper: ManifestW = ManifestW() - override lazy val config: Config = RestHighLevelClientPersonHandlerSpec.this.elasticConfig - } - - override def person2ExternalProcessorStream: ActorSystem[_] => PersonToExternalProcessorStream = - sys => - new PersonToElasticProcessorStream with RestHighLevelClientApi { - override val forTests: Boolean = true - override protected val manifestWrapper: ManifestW = ManifestW() - override implicit def system: ActorSystem[_] = sys - override def log: Logger = LoggerFactory getLogger getClass.getName - override lazy val config: Config = RestHighLevelClientPersonHandlerSpec.this.elasticConfig - } - - override def log: Logger = LoggerFactory getLogger getClass.getName -} +class RestHighLevelClientPersonHandlerSpec extends ElasticClientPersonHandlerSpec diff --git a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticBridge.scala similarity index 93% rename from es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala rename to es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticBridge.scala index 2b3c9d6b..187bf278 100644 --- a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala +++ b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticBridge.scala @@ -41,7 +41,7 @@ import com.sksamuel.elastic4s.searches.queries.{InnerHit, Query} import scala.annotation.tailrec -case class ElasticQuery(filter: ElasticFilter) { +case class ElasticBridge(filter: ElasticFilter) { def query( innerHitsNames: Set[String] = Set.empty, currentQuery: Option[ElasticBoolQuery] @@ -50,9 +50,9 @@ case class ElasticQuery(filter: ElasticFilter) { case boolQuery: ElasticBoolQuery => import boolQuery._ bool( - mustFilters.map(implicitly[ElasticQuery](_).query(innerHitsNames, currentQuery)), - shouldFilters.map(implicitly[ElasticQuery](_).query(innerHitsNames, currentQuery)), - notFilters.map(implicitly[ElasticQuery](_).query(innerHitsNames, currentQuery)) + mustFilters.map(implicitly[ElasticBridge](_).query(innerHitsNames, currentQuery)), + shouldFilters.map(implicitly[ElasticBridge](_).query(innerHitsNames, currentQuery)), + notFilters.map(implicitly[ElasticBridge](_).query(innerHitsNames, currentQuery)) ) .filter(innerFilters.map(_.query(innerHitsNames, currentQuery))) case nested: ElasticNested => @@ -115,12 +115,12 @@ case class ElasticQuery(filter: ElasticFilter) { case p: Predicate if nestedTrees.size > 1 => val leftNested = ElasticNested(p.leftCriteria, p.leftCriteria.limit) val leftBoolQuery = Option(ElasticBoolQuery(group = true)) - val leftQuery = ElasticQuery(leftNested) + val leftQuery = ElasticBridge(leftNested) .query(innerHitsNames /*++ leftNested.innerHitsName.toSet*/, leftBoolQuery) val rightNested = ElasticNested(p.rightCriteria, p.rightCriteria.limit) val rightBoolQuery = Option(ElasticBoolQuery(group = true)) - val rightQuery = ElasticQuery(rightNested) + val rightQuery = ElasticBridge(rightNested) .query(innerHitsNames /*++ rightNested.innerHitsName.toSet*/, rightBoolQuery) p.operator match { diff --git a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index c13ee796..a08d5068 100644 --- a/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/es6/sql-bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -825,8 +825,8 @@ package object bridge { implicit def filterToQuery( filter: ElasticFilter - ): ElasticQuery = { - ElasticQuery(filter) + ): ElasticBridge = { + ElasticBridge(filter) } @deprecated diff --git a/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index d1a4a4ee..88c3501e 100644 --- a/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/es6/sql-bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -6,8 +6,7 @@ import app.softnetwork.elastic.sql.query.SQLQuery import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers -//import scala.jdk.CollectionConverters._ -import scala.collection.JavaConverters._ +import scala.jdk.CollectionConverters._ /** Created by smanciot on 13/04/17. */ diff --git a/es7/rest/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi b/es7/rest/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi new file mode 100644 index 00000000..d24027cc --- /dev/null +++ b/es7/rest/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi @@ -0,0 +1 @@ +app.softnetwork.elastic.client.spi.RestHighLevelClientSpi \ No newline at end of file diff --git a/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala b/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala index 76b317dc..45bfa4bc 100644 --- a/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala +++ b/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientApi.scala @@ -18,30 +18,41 @@ package app.softnetwork.elastic.client.rest import akka.NotUsed import akka.actor.ActorSystem -import akka.stream.scaladsl.Flow +import akka.stream.scaladsl.{Flow, Source} import app.softnetwork.elastic.client._ +import app.softnetwork.elastic.client.bulk._ +import app.softnetwork.elastic.client.scroll._ import app.softnetwork.elastic.sql.bridge._ -import app.softnetwork.elastic.sql.query.{SQLQuery, SQLSearchRequest} -import app.softnetwork.elastic.{client, sql} -import app.softnetwork.persistence.model.Timestamped -import app.softnetwork.serialization.serialization +import app.softnetwork.elastic.sql.query.{SQLAggregation, SQLSearchRequest} import com.google.gson.JsonParser +import org.apache.http.util.EntityUtils import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions +import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest -import org.elasticsearch.action.admin.indices.flush.FlushRequest +import org.elasticsearch.action.admin.indices.flush.{FlushRequest, FlushResponse} import org.elasticsearch.action.admin.indices.open.OpenIndexRequest -import org.elasticsearch.action.admin.indices.refresh.RefreshRequest -import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest +import org.elasticsearch.action.admin.indices.refresh.{RefreshRequest, RefreshResponse} +import org.elasticsearch.action.admin.indices.settings.get.{GetSettingsRequest, GetSettingsResponse} import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest -import org.elasticsearch.action.bulk.{BulkItemResponse, BulkRequest, BulkResponse} +import org.elasticsearch.action.bulk.{BulkRequest, BulkResponse} import org.elasticsearch.action.delete.{DeleteRequest, DeleteResponse} import org.elasticsearch.action.get.{GetRequest, GetResponse} import org.elasticsearch.action.index.{IndexRequest, IndexResponse} -import org.elasticsearch.action.search.{MultiSearchRequest, SearchRequest, SearchResponse} +import org.elasticsearch.action.search.{ + ClearScrollRequest, + ClosePointInTimeRequest, + MultiSearchRequest, + MultiSearchResponse, + OpenPointInTimeRequest, + SearchRequest, + SearchResponse, + SearchScrollRequest +} +import org.elasticsearch.action.support.master.AcknowledgedResponse import org.elasticsearch.action.update.{UpdateRequest, UpdateResponse} import org.elasticsearch.action.{ActionListener, DocWriteRequest} -import org.elasticsearch.client.{Request, RequestOptions} +import org.elasticsearch.client.{GetAliasesResponse, Request, RequestOptions} import org.elasticsearch.client.core.{CountRequest, CountResponse} import org.elasticsearch.client.indices.{ CloseIndexRequest, @@ -50,17 +61,17 @@ import org.elasticsearch.client.indices.{ GetMappingsRequest, PutMappingRequest } -import org.elasticsearch.common.io.stream.InputStreamStreamInput +import org.elasticsearch.common.Strings +import org.elasticsearch.core.TimeValue import org.elasticsearch.xcontent.{DeprecationHandler, XContentType} import org.elasticsearch.rest.RestStatus -import org.elasticsearch.search.aggregations.bucket.filter.Filter -import org.elasticsearch.search.aggregations.bucket.nested.Nested -import org.elasticsearch.search.aggregations.metrics.{Avg, Cardinality, Max, Min, Sum, ValueCount} -import org.elasticsearch.search.builder.SearchSourceBuilder -import org.json4s.Formats - -import java.io.ByteArrayInputStream -import scala.collection.JavaConverters.mapAsScalaMapConverter +import org.elasticsearch.search.builder.{PointInTimeBuilder, SearchSourceBuilder} +import org.elasticsearch.search.sort.{FieldSortBuilder, SortOrder} +import org.json4s.jackson.JsonMethods +import org.json4s.DefaultFormats + +import java.io.IOException +import scala.jdk.CollectionConverters._ import scala.concurrent.{ExecutionContext, Future, Promise} import scala.language.implicitConversions import scala.util.{Failure, Success, Try} @@ -74,826 +85,754 @@ trait RestHighLevelClientApi with RestHighLevelClientRefreshApi with RestHighLevelClientFlushApi with RestHighLevelClientCountApi - with RestHighLevelClientSingleValueAggregateApi with RestHighLevelClientIndexApi with RestHighLevelClientUpdateApi with RestHighLevelClientDeleteApi with RestHighLevelClientGetApi with RestHighLevelClientSearchApi with RestHighLevelClientBulkApi + with RestHighLevelClientScrollApi + with RestHighLevelClientCompanion + with RestHighLevelClientVersion -trait RestHighLevelClientIndicesApi extends IndicesApi with RestHighLevelClientCompanion { - override def createIndex(index: String, settings: String): Boolean = { - tryOrElse( - apply() - .indices() - .create( - new CreateIndexRequest(index) - .settings(settings, XContentType.JSON), - RequestOptions.DEFAULT - ) - .isAcknowledged, - false - )(logger) - } +/** Version API implementation for RestHighLevelClient + * @see + * [[VersionApi]] for generic API documentation + */ +trait RestHighLevelClientVersion extends VersionApi with RestHighLevelClientHelpers { + _: RestHighLevelClientCompanion with SerializationApi => - override def deleteIndex(index: String): Boolean = { - tryOrElse( - apply() - .indices() - .delete(new DeleteIndexRequest(index), RequestOptions.DEFAULT) - .isAcknowledged, - false - )(logger) - } + override private[client] def executeVersion(): result.ElasticResult[String] = + executeRestLowLevelAction[String]( + operation = "version", + index = None, + retryable = true + )( + request = new Request("GET", "/") + )( + transformer = resp => { + val jsonString = EntityUtils.toString(resp.getEntity) + implicit val formats: DefaultFormats.type = DefaultFormats + val json = JsonMethods.parse(jsonString) + (json \ "version" \ "number").extract[String] + } + ) - override def openIndex(index: String): Boolean = { - tryOrElse( - apply().indices().open(new OpenIndexRequest(index), RequestOptions.DEFAULT).isAcknowledged, - false - )(logger) - } +} - override def closeIndex(index: String): Boolean = { - tryOrElse( - apply().indices().close(new CloseIndexRequest(index), RequestOptions.DEFAULT).isAcknowledged, - false - )(logger) - } +/** Indices management API for RestHighLevelClient + * @see + * [[IndicesApi]] for generic API documentation + */ +trait RestHighLevelClientIndicesApi extends IndicesApi with RestHighLevelClientHelpers { + _: RefreshApi with RestHighLevelClientCompanion => - /** Reindex from source index to target index. - * - * @param sourceIndex - * - the name of the source index - * @param targetIndex - * - the name of the target index - * @param refresh - * - true to refresh the target index after reindexing, false otherwise - * @return - * true if the reindexing was successful, false otherwise - */ - override def reindex(sourceIndex: String, targetIndex: String, refresh: Boolean): Boolean = { - val request = new Request("POST", "/_reindex?refresh=true") - request.setJsonEntity( - s""" - |{ - | "source": { - | "index": "$sourceIndex" - | }, - | "dest": { - | "index": "$targetIndex" - | } - |} - """.stripMargin + override private[client] def executeCreateIndex( + index: String, + settings: String + ): result.ElasticResult[Boolean] = { + executeRestBooleanAction[CreateIndexRequest, AcknowledgedResponse]( + operation = "createIndex", + index = Some(index), + retryable = false + )( + request = new CreateIndexRequest(index).settings(settings, XContentType.JSON) + )( + executor = req => apply().indices().create(req, RequestOptions.DEFAULT) ) - tryOrElse( - apply().getLowLevelClient.performRequest(request).getStatusLine.getStatusCode < 400, - false - )(logger) } - /** Check if an index exists. - * - * @param index - * - the name of the index to check - * @return - * true if the index exists, false otherwise - */ - override def indexExists(index: String): Boolean = { - tryOrElse( - apply().indices().exists(new GetIndexRequest(index), RequestOptions.DEFAULT), - false - )(logger) - } + override private[client] def executeDeleteIndex(index: String): result.ElasticResult[Boolean] = + executeRestBooleanAction[DeleteIndexRequest, AcknowledgedResponse]( + operation = "deleteIndex", + index = Some(index), + retryable = false + )( + request = new DeleteIndexRequest(index) + )( + executor = req => apply().indices().delete(req, RequestOptions.DEFAULT) + ) -} + override private[client] def executeCloseIndex(index: String): result.ElasticResult[Boolean] = + executeRestBooleanAction[CloseIndexRequest, AcknowledgedResponse]( + operation = "closeIndex", + index = Some(index), + retryable = false + )( + request = new CloseIndexRequest(index) + )( + executor = req => apply().indices().close(req, RequestOptions.DEFAULT) + ) -trait RestHighLevelClientAliasApi extends AliasApi with RestHighLevelClientCompanion { - override def addAlias(index: String, alias: String): Boolean = { - tryOrElse( - apply() - .indices() - .updateAliases( - new IndicesAliasesRequest() - .addAliasAction( - new AliasActions(AliasActions.Type.ADD) - .index(index) - .alias(alias) - ), - RequestOptions.DEFAULT - ) - .isAcknowledged, - false - )(logger) - } + override private[client] def executeOpenIndex(index: String): result.ElasticResult[Boolean] = + executeRestBooleanAction[OpenIndexRequest, AcknowledgedResponse]( + operation = "openIndex", + index = Some(index), + retryable = false + )( + request = new OpenIndexRequest(index) + )( + executor = req => apply().indices().open(req, RequestOptions.DEFAULT) + ) - override def removeAlias(index: String, alias: String): Boolean = { - tryOrElse( - apply() - .indices() - .updateAliases( - new IndicesAliasesRequest() - .addAliasAction( - new AliasActions(AliasActions.Type.REMOVE) - .index(index) - .alias(alias) - ), - RequestOptions.DEFAULT + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): result.ElasticResult[(Boolean, Option[Long])] = + executeRestAction[Request, org.elasticsearch.client.Response, (Boolean, Option[Long])]( + operation = "reindex", + index = Some(s"$sourceIndex->$targetIndex"), + retryable = false + )( + request = { + val req = new Request("POST", s"/_reindex?refresh=$refresh") + req.setJsonEntity( + s""" + |{ + | "source": { + | "index": "$sourceIndex" + | }, + | "dest": { + | "index": "$targetIndex" + | } + |} + """.stripMargin ) - .isAcknowledged, - false - )(logger) - } -} + req + } + )( + executor = req => apply().getLowLevelClient.performRequest(req) + )(resp => { + resp.getStatusLine match { + case statusLine if statusLine.getStatusCode >= 400 => + (false, None) + case _ => + val json = new JsonParser() + .parse( + scala.io.Source.fromInputStream(resp.getEntity.getContent).mkString + ) + .getAsJsonObject + if (json.has("failures") && json.get("failures").getAsJsonArray.size() > 0) { + (false, None) + } else { + (true, Some(json.get("created").getAsLong)) + } + } + }) -trait RestHighLevelClientSettingsApi extends SettingsApi with RestHighLevelClientCompanion { - _: RestHighLevelClientIndicesApi => - - override def updateSettings(index: String, settings: String): Boolean = { - tryOrElse( - apply() - .indices() - .putSettings( - new UpdateSettingsRequest(index) - .settings(settings, XContentType.JSON), - RequestOptions.DEFAULT - ) - .isAcknowledged, - false - )(logger) - } + override private[client] def executeIndexExists(index: String): result.ElasticResult[Boolean] = + executeRestAction[GetIndexRequest, Boolean, Boolean]( + operation = "indexExists", + index = Some(index), + retryable = false + )( + request = new GetIndexRequest(index) + )( + executor = req => apply().indices().exists(req, RequestOptions.DEFAULT) + )( + identity + ) - override def loadSettings(index: String): String = { - tryOrElse( - { - new JsonParser() - .parse( - apply() - .indices() - .getSettings( - new GetSettingsRequest().indices(index), - RequestOptions.DEFAULT - ) - .toString - ) - .getAsJsonObject - .get(index) - .getAsJsonObject - .get("settings") - .getAsJsonObject - .get("index") - .getAsJsonObject - .toString - }, - "{}" - )(logger) - } } -trait RestHighLevelClientMappingApi extends MappingApi with RestHighLevelClientCompanion { - override def setMapping(index: String, mapping: String): Boolean = { - tryOrElse( - apply() - .indices() - .putMapping( - new PutMappingRequest(index) - .source(mapping, XContentType.JSON), - RequestOptions.DEFAULT +/** Alias management API for RestHighLevelClient + * @see + * [[AliasApi]] for generic API documentation + */ +trait RestHighLevelClientAliasApi extends AliasApi with RestHighLevelClientHelpers { + _: IndicesApi with RestHighLevelClientCompanion => + + override private[client] def executeAddAlias( + index: String, + alias: String + ): result.ElasticResult[Boolean] = + executeRestBooleanAction( + operation = "addAlias", + index = Some(index), + retryable = false + )( + request = new IndicesAliasesRequest() + .addAliasAction( + new AliasActions(AliasActions.Type.ADD) + .index(index) + .alias(alias) ) - .isAcknowledged, - false - )(logger) - } + )( + executor = req => apply().indices().updateAliases(req, RequestOptions.DEFAULT) + ) - override def getMapping(index: String): String = { - tryOrElse( - apply() - .indices() - .getMapping( - new GetMappingsRequest().indices(index), - RequestOptions.DEFAULT + override private[client] def executeRemoveAlias( + index: String, + alias: String + ): result.ElasticResult[Boolean] = + executeRestBooleanAction( + operation = "removeAlias", + index = Some(index), + retryable = false + )( + request = new IndicesAliasesRequest() + .addAliasAction( + new AliasActions(AliasActions.Type.REMOVE) + .index(index) + .alias(alias) ) - .mappings() - .asScala - .get(index) - .map(metadata => metadata.source().string()), - None - )(logger).getOrElse(s""""{$index: {"mappings": {}}}""") - } + )( + executor = req => apply().indices().updateAliases(req, RequestOptions.DEFAULT) + ) - override def getMappingProperties(index: String): String = { - tryOrElse( - getMapping(index), - "{\"properties\": {}}" - )(logger) - } + override private[client] def executeAliasExists(alias: String): result.ElasticResult[Boolean] = + executeRestAction[GetAliasesRequest, GetAliasesResponse, Boolean]( + operation = "aliasExists", + index = Some(alias), + retryable = true + )( + request = new GetAliasesRequest().aliases(alias) + )( + executor = req => apply().indices().getAlias(req, RequestOptions.DEFAULT) + )(response => !response.getAliases.isEmpty) -} + override private[client] def executeGetAliases(index: String): result.ElasticResult[String] = + executeRestAction[GetAliasesRequest, GetAliasesResponse, String]( + operation = "getAliases", + index = Some(index), + retryable = true + )( + request = new GetAliasesRequest().indices(index) + )( + executor = req => apply().indices().getAlias(req, RequestOptions.DEFAULT) + )(response => Strings.toString(response)) -trait RestHighLevelClientRefreshApi extends RefreshApi with RestHighLevelClientCompanion { - override def refresh(index: String): Boolean = { - tryOrElse( - apply() - .indices() - .refresh( - new RefreshRequest(index), - RequestOptions.DEFAULT + override private[client] def executeSwapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): result.ElasticResult[Boolean] = + executeRestBooleanAction( + operation = "swapAlias", + index = Some(s"$oldIndex -> $newIndex"), + retryable = false + )( + request = new IndicesAliasesRequest() + .addAliasAction( + new AliasActions(AliasActions.Type.REMOVE) + .index(oldIndex) + .alias(alias) ) - .getStatus - .getStatus < 400, - false - )(logger) - } + .addAliasAction( + new AliasActions(AliasActions.Type.ADD) + .index(newIndex) + .alias(alias) + ) + )( + executor = req => apply().indices().updateAliases(req, RequestOptions.DEFAULT) + ) } -trait RestHighLevelClientFlushApi extends FlushApi with RestHighLevelClientCompanion { - override def flush(index: String, force: Boolean = true, wait: Boolean = true): Boolean = { - tryOrElse( - apply() - .indices() - .flush( - new FlushRequest(index).force(force).waitIfOngoing(wait), - RequestOptions.DEFAULT - ) - .getStatus == RestStatus.OK, - false - )(logger) - } +/** Settings management API for RestHighLevelClient + * @see + * [[SettingsApi]] for generic API documentation + */ +trait RestHighLevelClientSettingsApi extends SettingsApi with RestHighLevelClientHelpers { + _: IndicesApi with RestHighLevelClientCompanion => + + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): result.ElasticResult[Boolean] = + executeRestBooleanAction( + operation = "updateSettings", + index = Some(index), + retryable = false + )( + request = new UpdateSettingsRequest(index) + .settings(settings, XContentType.JSON) + )( + executor = req => apply().indices().putSettings(req, RequestOptions.DEFAULT) + ) + + override private[client] def executeLoadSettings(index: String): result.ElasticResult[String] = + executeRestAction[GetSettingsRequest, GetSettingsResponse, String]( + operation = "loadSettings", + index = Some(index), + retryable = true + )( + request = new GetSettingsRequest().indices(index) + )( + executor = req => apply().indices().getSettings(req, RequestOptions.DEFAULT) + )(response => response.toString) + } -trait RestHighLevelClientCountApi extends CountApi with RestHighLevelClientCompanion { - override def countAsync( - query: client.JSONQuery - )(implicit ec: ExecutionContext): Future[Option[Double]] = { - val promise = Promise[Option[Double]]() - apply().countAsync( - new CountRequest().indices(query.indices: _*).types(query.types: _*), - RequestOptions.DEFAULT, - new ActionListener[CountResponse] { - override def onResponse(response: CountResponse): Unit = - promise.success(Option(response.getCount.toDouble)) - - override def onFailure(e: Exception): Unit = promise.failure(e) - } +/** Mapping API implementation for RestHighLevelClient + * @see + * [[MappingApi]] for generic API documentation + */ +trait RestHighLevelClientMappingApi extends MappingApi with RestHighLevelClientHelpers { + _: SettingsApi with IndicesApi with RefreshApi with RestHighLevelClientCompanion => + + override private[client] def executeSetMapping( + index: String, + mapping: String + ): result.ElasticResult[Boolean] = + executeRestBooleanAction( + operation = "setMapping", + index = Some(index), + retryable = false + )( + request = new PutMappingRequest(index) + .source(mapping, XContentType.JSON) + )( + executor = req => apply().indices().putMapping(req, RequestOptions.DEFAULT) ) - promise.future - } - override def count(query: client.JSONQuery): Option[Double] = { - tryOrElse( - Option( - apply() - .count( - new CountRequest().indices(query.indices: _*).types(query.types: _*), - RequestOptions.DEFAULT - ) - .getCount - .toDouble - ), - None - )(logger) - } + override private[client] def executeGetMapping(index: String): result.ElasticResult[String] = + executeRestAction[ + GetMappingsRequest, + org.elasticsearch.client.indices.GetMappingsResponse, + String + ]( + operation = "getMapping", + index = Some(index), + retryable = true + )( + request = new GetMappingsRequest().indices(index) + )( + executor = req => apply().indices().getMapping(req, RequestOptions.DEFAULT) + )(response => { + val mappings = response.mappings().asScala.get(index) + mappings match { + case Some(metadata) => metadata.source().toString + case None => s"""{"properties": {}}""" + } + }) + } -trait RestHighLevelClientSingleValueAggregateApi - extends SingleValueAggregateApi - with RestHighLevelClientCountApi { - override def aggregate( - sqlQuery: SQLQuery - )(implicit ec: ExecutionContext): Future[Seq[SingleValueAggregateResult]] = { - val aggregations: Seq[ElasticAggregation] = sqlQuery - val futures = for (aggregation <- aggregations) yield { - val promise: Promise[SingleValueAggregateResult] = Promise() - val field = aggregation.field - val sourceField = aggregation.sourceField - val aggType = aggregation.aggType - val aggName = aggregation.aggName - val query = aggregation.query.getOrElse("") - val sources = aggregation.sources - sourceField match { - case "_id" if aggType.sql == "count" => - countAsync( - JSONQuery( - query, - collection.immutable.Seq(sources: _*), - collection.immutable.Seq.empty[String] - ) - ).onComplete { - case Success(result) => - promise.success( - SingleValueAggregateResult( - field, - aggType, - result.map(r => NumericValue(r.doubleValue())).getOrElse(EmptyValue), - None - ) - ) - case Failure(f) => - logger.error(f.getMessage, f.fillInStackTrace()) - promise.success( - SingleValueAggregateResult(field, aggType, EmptyValue, Some(f.getMessage)) - ) - } - promise.future - case _ => - val jsonQuery = JSONQuery( - query, - collection.immutable.Seq(sources: _*), - collection.immutable.Seq.empty[String] - ) - import jsonQuery._ - // Create a parser for the query - val xContentParser = XContentType.JSON - .xContent() - .createParser( - namedXContentRegistry, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - jsonQuery.query - ) - apply().searchAsync( - new SearchRequest(indices: _*) - .types(types: _*) - .source( - SearchSourceBuilder.fromXContent(xContentParser) - ), - RequestOptions.DEFAULT, - new ActionListener[SearchResponse] { - override def onResponse(response: SearchResponse): Unit = { - val agg = aggName.split("\\.").last - - val itAgg = aggName.split("\\.").iterator - - var root = - if (aggregation.nested) { - response.getAggregations.get(itAgg.next()).asInstanceOf[Nested].getAggregations - } else { - response.getAggregations - } +/** Refresh API implementation for RestHighLevelClient + * @see + * [[RefreshApi]] for generic API documentation + */ +trait RestHighLevelClientRefreshApi extends RefreshApi with RestHighLevelClientHelpers { + _: RestHighLevelClientCompanion => - if (aggregation.filtered) { - root = root.get(itAgg.next()).asInstanceOf[Filter].getAggregations - } + override private[client] def executeRefresh(index: String): result.ElasticResult[Boolean] = + executeRestAction[RefreshRequest, RefreshResponse, Boolean]( + operation = "refresh", + index = Some(index), + retryable = true + )( + request = new RefreshRequest(index) + )( + executor = req => apply().indices().refresh(req, RequestOptions.DEFAULT) + )(response => response.getStatus.getStatus < 400) - promise.success( - SingleValueAggregateResult( - field, - aggType, - aggType match { - case sql.function.aggregate.COUNT => - if (aggregation.distinct) { - NumericValue(root.get(agg).asInstanceOf[Cardinality].value()) - } else { - NumericValue(root.get(agg).asInstanceOf[ValueCount].value()) - } - case sql.function.aggregate.SUM => - NumericValue(root.get(agg).asInstanceOf[Sum].value()) - case sql.function.aggregate.AVG => - NumericValue(root.get(agg).asInstanceOf[Avg].value()) - case sql.function.aggregate.MIN => - NumericValue(root.get(agg).asInstanceOf[Min].value()) - case sql.function.aggregate.MAX => - NumericValue(root.get(agg).asInstanceOf[Max].value()) - case _ => EmptyValue - }, - None - ) - ) - } +} + +/** Flush API implementation for RestHighLevelClient + * @see + * [[FlushApi]] for generic API documentation + */ +trait RestHighLevelClientFlushApi extends FlushApi with RestHighLevelClientHelpers { + _: RestHighLevelClientCompanion => + override private[client] def executeFlush( + index: String, + force: Boolean, + wait: Boolean + ): result.ElasticResult[Boolean] = + executeRestAction[FlushRequest, FlushResponse, Boolean]( + operation = "flush", + index = Some(index), + retryable = true + )( + request = new FlushRequest(index).force(force).waitIfOngoing(wait) + )( + executor = req => apply().indices().flush(req, RequestOptions.DEFAULT) + )(response => response.getStatus == RestStatus.OK) - override def onFailure(e: Exception): Unit = promise.failure(e) - } - ) - promise.future - } - } - Future.sequence(futures) - } } -trait RestHighLevelClientIndexApi extends IndexApi with RestHighLevelClientCompanion { - _: RestHighLevelClientRefreshApi => - override def index(index: String, id: String, source: String): Boolean = { - tryOrElse( - apply() - .index( - new IndexRequest(index) - .id(id) - .source(source, XContentType.JSON), - RequestOptions.DEFAULT - ) - .status() - .getStatus < 400, - false - )(logger) +/** Count API implementation for RestHighLevelClient + * @see + * [[CountApi]] for generic API documentation + */ +trait RestHighLevelClientCountApi extends CountApi with RestHighLevelClientHelpers { + _: RestHighLevelClientCompanion => + override private[client] def executeCount( + query: ElasticQuery + ): result.ElasticResult[Option[Double]] = + executeRestAction[CountRequest, CountResponse, Option[Double]]( + operation = "count", + index = Some(query.indices.mkString(",")), + retryable = true + )( + request = new CountRequest().indices(query.indices: _*).types(query.types: _*) + )( + executor = req => apply().count(req, RequestOptions.DEFAULT) + )(response => Option(response.getCount.toDouble)) + + override private[client] def executeCountAsync( + query: ElasticQuery + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Option[Double]]] = { + executeAsyncRestAction[CountRequest, CountResponse, Option[Double]]( + operation = "countAsync", + index = Some(query.indices.mkString(",")), + retryable = true + )( + request = new CountRequest().indices(query.indices: _*).types(query.types: _*) + )( + executor = (req, listener) => apply().countAsync(req, RequestOptions.DEFAULT, listener) + )(response => Option(response.getCount.toDouble)) } - override def indexAsync(index: String, id: String, source: String)(implicit +} + +/** Index API implementation for RestHighLevelClient + * @see + * [[IndexApi]] for generic API documentation + */ +trait RestHighLevelClientIndexApi extends IndexApi with RestHighLevelClientHelpers { + _: RefreshApi with RestHighLevelClientCompanion with SerializationApi => + override private[client] def executeIndex( + index: String, + id: String, + source: String + ): result.ElasticResult[Boolean] = + executeRestAction[IndexRequest, IndexResponse, Boolean]( + operation = "index", + index = Some(index), + retryable = false + )( + request = new IndexRequest(index) + .`type`("_doc") + .id(id) + .source(source, XContentType.JSON) + )( + executor = req => apply().index(req, RequestOptions.DEFAULT) + )(response => response.status().getStatus < 400) + + override private[client] def executeIndexAsync(index: String, id: String, source: String)(implicit ec: ExecutionContext - ): Future[Boolean] = { - val promise: Promise[Boolean] = Promise() - apply().indexAsync( - new IndexRequest(index) + ): Future[result.ElasticResult[Boolean]] = + executeAsyncRestAction[IndexRequest, IndexResponse, Boolean]( + operation = "indexAsync", + index = Some(index), + retryable = false + )( + request = new IndexRequest(index) + .`type`("_doc") .id(id) - .source(source, XContentType.JSON), - RequestOptions.DEFAULT, - new ActionListener[IndexResponse] { - override def onResponse(response: IndexResponse): Unit = - promise.success(response.status().getStatus < 400) + .source(source, XContentType.JSON) + )( + executor = (req, listener) => apply().indexAsync(req, RequestOptions.DEFAULT, listener) + )(response => response.status().getStatus < 400) - override def onFailure(e: Exception): Unit = promise.failure(e) - } - ) - promise.future - } } -trait RestHighLevelClientUpdateApi extends UpdateApi with RestHighLevelClientCompanion { - _: RestHighLevelClientRefreshApi => - override def update( +/** Update API implementation for RestHighLevelClient + * @see + * [[UpdateApi]] for generic API documentation + */ +trait RestHighLevelClientUpdateApi extends UpdateApi with RestHighLevelClientHelpers { + _: RefreshApi with RestHighLevelClientCompanion with SerializationApi => + override private[client] def executeUpdate( index: String, id: String, source: String, upsert: Boolean - ): Boolean = { - tryOrElse( - apply() - .update( - new UpdateRequest(index, id) - .doc(source, XContentType.JSON) - .docAsUpsert(upsert), - RequestOptions.DEFAULT - ) - .status() - .getStatus < 400, - false - )(logger) - } + ): result.ElasticResult[Boolean] = + executeRestAction[UpdateRequest, UpdateResponse, Boolean]( + operation = "update", + index = Some(index), + retryable = false + )( + request = new UpdateRequest(index, id) + .doc(source, XContentType.JSON) + .docAsUpsert(upsert) + )( + executor = req => apply().update(req, RequestOptions.DEFAULT) + )(response => response.status().getStatus < 400) - override def updateAsync( + override private[client] def executeUpdateAsync( index: String, id: String, source: String, upsert: Boolean - )(implicit ec: ExecutionContext): Future[Boolean] = { - val promise: Promise[Boolean] = Promise() - apply().updateAsync( - new UpdateRequest(index, id) + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Boolean]] = + executeAsyncRestAction[UpdateRequest, UpdateResponse, Boolean]( + operation = "updateAsync", + index = Some(index), + retryable = false + )( + request = new UpdateRequest(index, id) .doc(source, XContentType.JSON) - .docAsUpsert(upsert), - RequestOptions.DEFAULT, - new ActionListener[UpdateResponse] { - override def onResponse(response: UpdateResponse): Unit = - promise.success(response.status().getStatus < 400) + .docAsUpsert(upsert) + )( + executor = (req, listener) => apply().updateAsync(req, RequestOptions.DEFAULT, listener) + )(response => response.status().getStatus < 400) - override def onFailure(e: Exception): Unit = promise.failure(e) - } - ) - promise.future - } } -trait RestHighLevelClientDeleteApi extends DeleteApi with RestHighLevelClientCompanion { - _: RestHighLevelClientRefreshApi => +/** Delete API implementation for RestHighLevelClient + * @see + * [[DeleteApi]] for generic API documentation + */ +trait RestHighLevelClientDeleteApi extends DeleteApi with RestHighLevelClientHelpers { + _: RefreshApi with RestHighLevelClientCompanion => - override def delete(uuid: String, index: String): Boolean = { - tryOrElse( - apply() - .delete( - new DeleteRequest(index, uuid), - RequestOptions.DEFAULT - ) - .status() - .getStatus < 400, - false - )(logger) - } + override private[client] def executeDelete( + index: String, + id: String + ): result.ElasticResult[Boolean] = + executeRestAction[DeleteRequest, DeleteResponse, Boolean]( + operation = "delete", + index = Some(index), + retryable = false + )( + request = new DeleteRequest(index, id) + )( + executor = req => apply().delete(req, RequestOptions.DEFAULT) + )(response => response.status().getStatus < 400) - override def deleteAsync(uuid: String, index: String)(implicit + override private[client] def executeDeleteAsync(index: String, id: String)(implicit ec: ExecutionContext - ): Future[Boolean] = { - val promise: Promise[Boolean] = Promise() - apply().deleteAsync( - new DeleteRequest(index, uuid), - RequestOptions.DEFAULT, - new ActionListener[DeleteResponse] { - override def onResponse(response: DeleteResponse): Unit = - promise.success(response.status().getStatus < 400) - - override def onFailure(e: Exception): Unit = promise.failure(e) - } - ) - promise.future - } + ): Future[result.ElasticResult[Boolean]] = + executeAsyncRestAction[DeleteRequest, DeleteResponse, Boolean]( + operation = "deleteAsync", + index = Some(index), + retryable = false + )( + request = new DeleteRequest(index, id) + )( + executor = (req, listener) => apply().deleteAsync(req, RequestOptions.DEFAULT, listener) + )(response => response.status().getStatus < 400) + } -trait RestHighLevelClientGetApi extends GetApi with RestHighLevelClientCompanion { - def get[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], formats: Formats): Option[U] = { - Try( - apply().get( - new GetRequest( - index.getOrElse( - maybeType.getOrElse( - m.runtimeClass.getSimpleName.toLowerCase - ) - ), - id - ), - RequestOptions.DEFAULT - ) - ) match { - case Success(response) => - if (response.isExists) { - val source = response.getSourceAsString - logger.info(s"Deserializing response $source for id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}") - // Deserialize the source string to the expected type - // Note: This assumes that the source is a valid JSON representation of U - // and that the serialization library is capable of handling it. - Try(serialization.read[U](source)) match { - case Success(value) => Some(value) - case Failure(f) => - logger.error( - s"Failed to deserialize response $source for id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}", - f - ) - None - } - } else { - None - } - case Failure(f) => - logger.error( - s"Failed to get document with id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}", - f - ) +/** Get API implementation for RestHighLevelClient + * @see + * [[GetApi]] for generic API documentation + */ +trait RestHighLevelClientGetApi extends GetApi with RestHighLevelClientHelpers { + _: RestHighLevelClientCompanion with SerializationApi => + override private[client] def executeGet( + index: String, + id: String + ): result.ElasticResult[Option[String]] = + executeRestAction[GetRequest, GetResponse, Option[String]]( + operation = "get", + index = Some(index), + retryable = true + )( + request = new GetRequest(index, id) + )( + executor = req => apply().get(req, RequestOptions.DEFAULT) + )(response => { + if (response.isExists) { + Some(response.getSourceAsString) + } else { None - } - } - - override def getAsync[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], ec: ExecutionContext, formats: Formats): Future[Option[U]] = { - val promise = Promise[Option[U]]() - apply().getAsync( - new GetRequest( - index.getOrElse( - maybeType.getOrElse( - m.runtimeClass.getSimpleName.toLowerCase - ) - ), - id - ), - RequestOptions.DEFAULT, - new ActionListener[GetResponse] { - override def onResponse(response: GetResponse): Unit = { - if (response.isExists) { - promise.success(Some(serialization.read[U](response.getSourceAsString))) - } else { - promise.success(None) - } - } + } + }) - override def onFailure(e: Exception): Unit = promise.failure(e) + override private[client] def executeGetAsync(index: String, id: String)(implicit + ec: ExecutionContext + ): Future[result.ElasticResult[Option[String]]] = + executeAsyncRestAction[GetRequest, GetResponse, Option[String]]( + operation = "getAsync", + index = Some(index), + retryable = true + )( + request = new GetRequest(index, id) + )( + executor = (req, listener) => apply().getAsync(req, RequestOptions.DEFAULT, listener) + )(response => { + if (response.isExists) { + Some(response.getSourceAsString) + } else { + None } - ) - promise.future - } + }) + } -trait RestHighLevelClientSearchApi extends SearchApi with RestHighLevelClientCompanion { +/** Search API implementation for RestHighLevelClient + * @see + * [[SearchApi]] for generic API documentation + */ +trait RestHighLevelClientSearchApi extends SearchApi with RestHighLevelClientHelpers { + _: ElasticConversion with RestHighLevelClientCompanion with SerializationApi => + override implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String = implicitly[ElasticSearchRequest](sqlSearch).query - override def search[U]( - jsonQuery: JSONQuery - )(implicit m: Manifest[U], formats: Formats): List[U] = { - import jsonQuery._ - logger.info(s"Searching with query: $query on indices: ${indices.mkString(", ")}") - // Create a parser for the query - val xContentParser = XContentType.JSON - .xContent() - .createParser( - namedXContentRegistry, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - query - ) - val response = apply().search( - new SearchRequest(indices: _*) - .types(types: _*) - .source( - SearchSourceBuilder.fromXContent(xContentParser) - ), - RequestOptions.DEFAULT - ) - if (response.getHits.getTotalHits.value > 0) { - response.getHits.getHits.toList.map { hit => - logger.info(s"Deserializing hit: ${hit.getSourceAsString}") - serialization.read[U](hit.getSourceAsString) + override private[client] def executeSingleSearch( + elasticQuery: ElasticQuery + ): result.ElasticResult[Option[String]] = + executeRestAction[SearchRequest, SearchResponse, Option[String]]( + operation = "singleSearch", + index = Some(elasticQuery.indices.mkString(",")), + retryable = true + )( + request = { + val req = new SearchRequest(elasticQuery.indices: _*).types(elasticQuery.types: _*) + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + elasticQuery.query + ) + req.source(SearchSourceBuilder.fromXContent(xContentParser)) + req } - } else { - List.empty[U] - } - } + )( + executor = req => apply().search(req, RequestOptions.DEFAULT) + )(response => { + if (response.status() == RestStatus.OK) { + Some(Strings.toString(response)) + } else { + None + } + }) - override def searchAsync[U]( - sqlQuery: SQLQuery - )(implicit m: Manifest[U], ec: ExecutionContext, formats: Formats): Future[List[U]] = { - val jsonQuery: JSONQuery = sqlQuery - import jsonQuery._ - val promise = Promise[List[U]]() - logger.info(s"Searching with query: $query on indices: ${indices.mkString(", ")}") - // Create a parser for the query - val xContentParser = XContentType.JSON - .xContent() - .createParser( - namedXContentRegistry, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - query - ) - // Execute the search asynchronously - apply().searchAsync( - new SearchRequest(indices: _*) - .types(types: _*) - .source( - SearchSourceBuilder.fromXContent(xContentParser) - ), - RequestOptions.DEFAULT, - new ActionListener[SearchResponse] { - override def onResponse(response: SearchResponse): Unit = { - if (response.getHits.getTotalHits.value > 0) { - promise.success(response.getHits.getHits.toList.map { hit => - serialization.read[U](hit.getSourceAsString) - }) - } else { - promise.success(List.empty[U]) - } + override private[client] def executeMultiSearch( + elasticQueries: ElasticQueries + ): result.ElasticResult[Option[String]] = + executeRestAction[MultiSearchRequest, MultiSearchResponse, Option[String]]( + operation = "multiSearch", + index = Some( + elasticQueries.queries + .flatMap(_.indices) + .distinct + .mkString(",") + ), + retryable = true + )( + request = { + val req = new MultiSearchRequest() + for (query <- elasticQueries.queries) { + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + query.query + ) + val searchSourceBuilder = SearchSourceBuilder.fromXContent(xContentParser) + req.add( + new SearchRequest(query.indices: _*) + .types(query.types: _*) + .source(searchSourceBuilder) + ) } - - override def onFailure(e: Exception): Unit = promise.failure(e) + req } - ) - promise.future - } - - override def searchWithInnerHits[U, I](jsonQuery: JSONQuery, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[(U, List[I])] = { - import jsonQuery._ - // Create a parser for the query - val xContentParser = XContentType.JSON - .xContent() - .createParser( - namedXContentRegistry, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - jsonQuery.query - ) - val response = apply().search( - new SearchRequest(indices: _*) - .types(types: _*) - .source( - SearchSourceBuilder.fromXContent(xContentParser) - ), - RequestOptions.DEFAULT - ) - Try(new JsonParser().parse(response.toString).getAsJsonObject ~> [U, I] innerField) match { - case Success(s) => s - case Failure(f) => - logger.error(f.getMessage, f) - List.empty - } - } + )( + executor = req => apply().msearch(req, RequestOptions.DEFAULT) + )(response => Some(Strings.toString(response))) - override def multiSearch[U]( - jsonQueries: JSONQueries - )(implicit m: Manifest[U], formats: Formats): List[List[U]] = { - import jsonQueries._ - val request = new MultiSearchRequest() - for (query <- queries) { - request.add( - new SearchRequest(query.indices: _*) - .types(query.types: _*) - .source( - new SearchSourceBuilder( - new InputStreamStreamInput( - new ByteArrayInputStream( - query.query.getBytes() - ) - ) - ) + override private[client] def executeSingleSearchAsync( + elasticQuery: ElasticQuery + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Option[String]]] = + executeAsyncRestAction[SearchRequest, SearchResponse, Option[String]]( + operation = "executeSingleSearchAsync", + index = Some(elasticQuery.indices.mkString(",")), + retryable = true + )( + request = { + val req = new SearchRequest(elasticQuery.indices: _*).types(elasticQuery.types: _*) + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + elasticQuery.query ) - ) - } - val responses = apply().msearch(request, RequestOptions.DEFAULT) - responses.getResponses.toList.map { response => - if (response.isFailure) { - logger.error(s"Error in multi search: ${response.getFailureMessage}") - List.empty[U] + req.source(SearchSourceBuilder.fromXContent(xContentParser)) + req + } + )( + executor = (req, listener) => apply().searchAsync(req, RequestOptions.DEFAULT, listener) + )(response => { + if (response.status() == RestStatus.OK) { + Some(Strings.toString(response)) } else { - response.getResponse.getHits.getHits.toList.map { hit => - serialization.read[U](hit.getSourceAsString) - } + None } - } - } + }) - override def multiSearchWithInnerHits[U, I](jsonQueries: JSONQueries, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[List[(U, List[I])]] = { - import jsonQueries._ - val request = new MultiSearchRequest() - for (query <- queries) { - request.add( - new SearchRequest(query.indices: _*) - .types(query.types: _*) - .source( - new SearchSourceBuilder( - new InputStreamStreamInput( - new ByteArrayInputStream( - query.query.getBytes() - ) - ) + override private[client] def executeMultiSearchAsync( + elasticQueries: ElasticQueries + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Option[String]]] = + executeAsyncRestAction[MultiSearchRequest, MultiSearchResponse, Option[String]]( + operation = "executeMultiSearchAsync", + index = Some( + elasticQueries.queries + .flatMap(_.indices) + .distinct + .mkString(",") + ), + retryable = true + )( + request = { + val req = new MultiSearchRequest() + for (query <- elasticQueries.queries) { + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + query.query ) + val searchSourceBuilder = SearchSourceBuilder.fromXContent(xContentParser) + req.add( + new SearchRequest(query.indices: _*) + .types(query.types: _*) + .source(searchSourceBuilder) ) - ) - } - val responses = apply().msearch(request, RequestOptions.DEFAULT) - responses.getResponses.toList.map { response => - if (response.isFailure) { - logger.error(s"Error in multi search: ${response.getFailureMessage}") - List.empty[(U, List[I])] - } else { - Try( - new JsonParser().parse(response.getResponse.toString).getAsJsonObject ~> [U, I] innerField - ) match { - case Success(s) => s - case Failure(f) => - logger.error(f.getMessage, f) - List.empty } + req } - } - } + )( + executor = (req, listener) => apply().msearchAsync(req, RequestOptions.DEFAULT, listener) + )(response => Some(Strings.toString(response))) } -trait RestHighLevelClientBulkApi - extends RestHighLevelClientRefreshApi - with RestHighLevelClientSettingsApi - with RestHighLevelClientIndicesApi - with BulkApi { - override type A = DocWriteRequest[_] - override type R = BulkResponse +/** Bulk API implementation for RestHighLevelClient + * @see + * [[BulkApi]] for generic API documentation + */ +trait RestHighLevelClientBulkApi extends BulkApi with RestHighLevelClientHelpers { + _: RefreshApi with SettingsApi with IndexApi with RestHighLevelClientCompanion => - override def toBulkAction(bulkItem: BulkItem): A = { - import bulkItem._ - val request = action match { - case BulkAction.UPDATE => - new UpdateRequest(index, id.orNull) - .doc(body, XContentType.JSON) - .docAsUpsert(true) - case BulkAction.DELETE => - new DeleteRequest(index).id(id.getOrElse("_all")) - case _ => - new IndexRequest(index).source(body, XContentType.JSON).id(id.orNull) + override type BulkActionType = DocWriteRequest[_] + override type BulkResultType = BulkResponse + + override implicit def toBulkElasticAction(a: BulkActionType): BulkElasticAction = { + new BulkElasticAction { + override def index: String = a.index } - request } - override def bulkResult: Flow[R, Set[String], NotUsed] = - Flow[BulkResponse] - .named("result") - .map(result => { - val items = result.getItems - val grouped = items.groupBy(_.getIndex) - val indices = grouped.keys.toSet - for (index <- indices) { - logger - .info(s"Bulk operation succeeded for index $index with ${grouped(index).length} items.") - } - indices - }) - - override def bulk(implicit + /** Basic flow for executing a bulk action. This method must be implemented by concrete classes + * depending on the Elasticsearch version and client used. + * + * @param bulkOptions + * configuration options + * @return + * Flow transforming bulk actions into results + */ + override private[client] def bulkFlow(implicit bulkOptions: BulkOptions, system: ActorSystem - ): Flow[Seq[A], R, NotUsed] = { + ): Flow[Seq[BulkActionType], BulkResultType, NotUsed] = { val parallelism = Math.max(1, bulkOptions.balance) - Flow[Seq[A]] + Flow[Seq[BulkActionType]] .named("bulk") .mapAsyncUnordered[R](parallelism) { items => - val request = new BulkRequest(bulkOptions.index) + val request = new BulkRequest(bulkOptions.defaultIndex) items.foreach(request.add) val promise: Promise[R] = Promise[R]() apply().bulkAsync( @@ -919,21 +858,624 @@ trait RestHighLevelClientBulkApi } } - private[this] def toBulkElasticResultItem(i: BulkItemResponse): BulkElasticResultItem = - new BulkElasticResultItem { - override def index: String = i.getIndex + /** Convert a BulkResultType into individual results. This method must extract the successes and + * failures from the ES response. + * + * @param result + * raw result from the bulk + * @return + * sequence of Right(id) for success or Left(failed) for failure + */ + override private[client] def extractBulkResults( + result: BulkResultType, + originalBatch: Seq[BulkItem] + ): Seq[Either[FailedDocument, SuccessfulDocument]] = { + // no results at all + if ( + originalBatch.nonEmpty && + (result == null || (result.getItems == null || result.getItems.isEmpty)) + ) { + logger.error("Bulk result is null or has no items") + return originalBatch.map { item => + Left( + FailedDocument( + id = item.id.getOrElse("unknown"), + index = item.index, + document = item.document, + error = BulkError( + message = "Null bulk result", + `type` = "internal_error", + status = 500 + ), + retryable = false + ) + ) + } } - override implicit def toBulkElasticAction(a: DocWriteRequest[_]): BulkElasticAction = { - new BulkElasticAction { - override def index: String = a.index + // process failed items + val failedItems = result.getItems.filter(_.isFailed).map { item => + val failure = item.getFailure + val statusCode = item.status().getStatus + val errorType = Option(failure.getType).getOrElse("unknown") + val errorReason = Option(failure.getMessage).getOrElse("Unknown error") + + val itemId = item.getId + val itemIndex = item.getIndex + + val originalItemOpt = originalBatch + .find(o => o.id.contains(itemId) && o.index == itemIndex) + + // Determine if the error is retryable + val isRetryable = originalItemOpt.isDefined && (BulkErrorAnalyzer.isRetryable(statusCode) || + BulkErrorAnalyzer.isRetryableByType(errorType)) + + val originalItem = originalItemOpt.getOrElse( + BulkItem( + index = itemIndex, + id = Some(itemId), + document = "", + parent = None, + action = item.getOpType match { + case DocWriteRequest.OpType.INDEX => BulkAction.INDEX + case DocWriteRequest.OpType.CREATE => BulkAction.INDEX + case DocWriteRequest.OpType.UPDATE => BulkAction.UPDATE + case DocWriteRequest.OpType.DELETE => BulkAction.DELETE + } + ) + ) + + Left( + FailedDocument( + id = originalItem.id.getOrElse("unknown"), + index = originalItem.index, + document = originalItem.document, + error = BulkError( + message = errorReason, + `type` = errorType, + status = statusCode + ), + retryable = isRetryable + ) + ) + } + + // process successful items + val successfulItems = + result.getItems.filterNot(_.isFailed).map { item => + Right(SuccessfulDocument(id = item.getId, index = item.getIndex)) + } + + val results = failedItems ++ successfulItems + + // if no individual results but overall failure, mark all as failed + if (results.isEmpty && originalBatch.nonEmpty) { + val statusCode = result.status().getStatus + val errorString = result.buildFailureMessage() + logger.error(s"Bulk operation completed with errors: $errorString") + val bulkError = + BulkError( + message = errorString, + `type` = "unknown", + status = statusCode + ) + return originalBatch.map { item => + Left( + FailedDocument( + id = item.id.getOrElse("unknown"), + index = item.index, + document = item.document, + error = bulkError, + retryable = BulkErrorAnalyzer.isRetryable(statusCode) + ) + ) + } + } + + results + } + + override def toBulkAction(bulkItem: BulkItem): A = { + import bulkItem._ + val request = action match { + case BulkAction.UPDATE => + new UpdateRequest(bulkItem.index, id.orNull) + .doc(document, XContentType.JSON) + .docAsUpsert(true) + case BulkAction.DELETE => + new DeleteRequest(bulkItem.index).id(id.getOrElse("_all")) + case _ => + new IndexRequest(bulkItem.index).source(document, XContentType.JSON).id(id.orNull) + } + request + } + + /** Conversion BulkActionType -> BulkItem */ + override private[client] def actionToBulkItem(action: BulkActionType): BulkItem = { + action match { + case req: IndexRequest => + BulkItem( + index = req.index(), + id = Option(req.id()), + document = req.source().utf8ToString(), + parent = None, + action = BulkAction.INDEX + ) + case req: UpdateRequest => + BulkItem( + index = req.index(), + id = Option(req.id()), + document = req.doc().source().utf8ToString(), + parent = None, + action = BulkAction.UPDATE + ) + case req: DeleteRequest => + BulkItem( + index = req.index(), + id = Option(req.id()), + document = "", + parent = None, + action = BulkAction.DELETE + ) + case _ => + throw new IllegalArgumentException( + s"Unsupported BulkActionType: ${action.getClass.getName}" + ) + } + } + +} + +/** Scroll API implementation for RestHighLevelClient + * @see + * [[ScrollApi]] for generic API documentation + */ +trait RestHighLevelClientScrollApi extends ScrollApi with RestHighLevelClientHelpers { + _: SearchApi with VersionApi with RestHighLevelClientCompanion => + + /** Classic scroll (works for both hits and aggregations) + */ + override private[client] def scrollClassic( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation], + config: ScrollConfig + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = { + implicit val ec: ExecutionContext = system.dispatcher + Source + .unfoldAsync[Option[String], Seq[Map[String, Any]]](None) { scrollIdOpt => + retryWithBackoff(config.retryConfig) { + Future { + scrollIdOpt match { + case None => + // Initial search with scroll + logger.info( + s"Starting classic scroll on indices: ${elasticQuery.indices.mkString(", ")}" + ) + + val query = elasticQuery.query + // Create a parser for the query + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + query + ) + // Execute the search + val searchRequest = + new SearchRequest(elasticQuery.indices: _*) + .types(elasticQuery.types: _*) + .source( + SearchSourceBuilder.fromXContent(xContentParser).size(config.scrollSize) + ) + + searchRequest.scroll( + TimeValue.parseTimeValue(config.keepAlive, "scroll_timeout") + ) + + val response = apply().search(searchRequest, RequestOptions.DEFAULT) + + if (response.status() != RestStatus.OK) { + throw new IOException(s"Initial scroll failed with status: ${response.status()}") + } + + val scrollId = response.getScrollId + + if (scrollId == null) { + throw new IllegalStateException("Scroll ID is null in response") + } + + // Extract both hits AND aggregations + val results = extractAllResults(response, fieldAliases, aggregations) + + logger.info(s"Initial scroll returned ${results.size} results, scrollId: $scrollId") + + if (results.isEmpty) { + None + } else { + Some((Some(scrollId), results)) + } + + case Some(scrollId) => + // Subsequent scroll requests + logger.debug(s"Fetching next scroll batch (scrollId: $scrollId)") + + val scrollRequest = new SearchScrollRequest(scrollId) + scrollRequest.scroll( + TimeValue.parseTimeValue(config.keepAlive, "scroll_timeout") + ) + + val result = apply().scroll(scrollRequest, RequestOptions.DEFAULT) + + if (result.status() != RestStatus.OK) { + clearScroll(scrollId) + throw new IOException( + s"Scroll continuation failed with status: ${result.status()}" + ) + } + + val newScrollId = result.getScrollId + val results = extractAllResults(result, fieldAliases, aggregations) + + logger.debug(s"Scroll returned ${results.size} results") + + if (results.isEmpty) { + clearScroll(scrollId) + None + } else { + Some((Some(newScrollId), results)) + } + } + } + }(system, logger).recover { case ex: Exception => + logger.error(s"Scroll failed after retries: ${ex.getMessage}", ex) + scrollIdOpt.foreach(clearScroll) + None + } + } + .mapConcat(identity) + } + + /** Search After (only for hits, more efficient) + */ + override private[client] def searchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean = false + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = { + implicit val ec: ExecutionContext = system.dispatcher + Source + .unfoldAsync[Option[Array[Object]], Seq[Map[String, Any]]](None) { searchAfterOpt => + retryWithBackoff(config.retryConfig) { + Future { + searchAfterOpt match { + case None => + logger.info( + s"Starting search_after on indices: ${elasticQuery.indices.mkString(", ")}" + ) + case Some(values) => + logger.debug(s"Fetching next search_after batch (after: ${if (values.length > 3) + s"[${values.take(3).mkString(", ")}...]" + else values.mkString(", ")})") + } + + val query = elasticQuery.query + // Create a parser for the query + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + query + ) + val sourceBuilder = + SearchSourceBuilder.fromXContent(xContentParser).size(config.scrollSize) + + // Check if sorts already exist in the query + if (!hasSorts && sourceBuilder.sorts() == null) { + logger.warn( + "No sort fields in query for search_after, adding default _id sort. " + + "This may lead to inconsistent results if documents are updated during scroll." + ) + sourceBuilder.sort("_id", SortOrder.ASC) + } else if (hasSorts && sourceBuilder.sorts() != null) { + // Sorts already present, check that a tie-breaker exists + val hasIdSort = sourceBuilder.sorts().asScala.exists { sortBuilder => + sortBuilder match { + case fieldSort: FieldSortBuilder => + fieldSort.getFieldName == "_id" + case _ => + false + } + } + if (!hasIdSort) { + // Add _id as tie-breaker + logger.debug("Adding _id as tie-breaker to existing sorts") + sourceBuilder.sort("_id", SortOrder.ASC) + } + } + + // Add search_after if available + searchAfterOpt.foreach { searchAfter => + sourceBuilder.searchAfter(searchAfter) + } + + // Execute the search + val searchRequest = + new SearchRequest(elasticQuery.indices: _*) + .types(elasticQuery.types: _*) + .source( + sourceBuilder + ) + + val response = apply().search(searchRequest, RequestOptions.DEFAULT) + + if (response.status() != RestStatus.OK) { + throw new IOException(s"Search after failed with status: ${response.status()}") + } + + // Extract ONLY hits (no aggregations for search_after) + val hits = extractHitsOnly(response, fieldAliases) + + if (hits.isEmpty) { + None + } else { + val searchHits = response.getHits.getHits + val lastHit = searchHits.last + val nextSearchAfter = Option(lastHit.getSortValues) + + logger.debug( + s"Retrieved ${hits.size} hits, next search_after: ${nextSearchAfter + .map(arr => + if (arr.length > 3) s"[${arr.take(3).mkString(", ")}...]" + else arr.mkString(", ") + ) + .getOrElse("None")}" + ) + + Some((nextSearchAfter, hits)) + } + } + }(system, logger).recover { case ex: Exception => + logger.error(s"Search after failed after retries: ${ex.getMessage}", ex) + None + } + } + .mapConcat(identity) + } + + /** PIT + search_after for ES 7.10+ + * + * @note + * Requires ES 7.10+. For ES 6.x, use searchAfterSource instead. + */ + private[client] def pitSearchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean = false + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = { + implicit val ec: ExecutionContext = system.dispatcher + + // Open PIT + val pitIdFuture: Future[String] = openPit(elasticQuery.indices, config.keepAlive) + + Source + .futureSource { + pitIdFuture.map { pitId => + logger.info( + s"Opened PIT: ${pitId.take(20)}... for indices: ${elasticQuery.indices.mkString(", ")}" + ) + + Source + .unfoldAsync[Option[Array[Object]], Seq[Map[String, Any]]](None) { searchAfterOpt => + retryWithBackoff(config.retryConfig) { + Future { + searchAfterOpt match { + case None => + logger.info(s"Starting PIT search_after (pitId: ${pitId.take(20)}...)") + case Some(values) => + logger.debug( + s"Fetching next PIT search_after batch (after: ${if (values.length > 3) + s"[${values.take(3).mkString(", ")}...]" + else values.mkString(", ")})" + ) + } + + // Parse query + val xContentParser = XContentType.JSON + .xContent() + .createParser( + namedXContentRegistry, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + elasticQuery.query + ) + + val sourceBuilder = SearchSourceBuilder + .fromXContent(xContentParser) + .size(config.scrollSize) + + // Check if sorts already exist in the query + if (!hasSorts && sourceBuilder.sorts() == null) { + logger.warn( + "No sort fields in query for PIT search_after, adding default _shard_doc sort." + ) + sourceBuilder.sort("_shard_doc", SortOrder.ASC) + } else if (hasSorts && sourceBuilder.sorts() != null) { + // Sorts already present, check that a tie-breaker exists + val hasShardDocSort = sourceBuilder.sorts().asScala.exists { + case fieldSort: FieldSortBuilder => + fieldSort.getFieldName == "_shard_doc" || fieldSort.getFieldName == "_id" + case _ => + false + } + + if (!hasShardDocSort) { + // Add _id as tie-breaker + logger.debug("Adding _shard_doc as tie-breaker to existing sorts") + sourceBuilder.sort("_shard_doc", SortOrder.ASC) + } + } + + // Add search_after + searchAfterOpt.foreach { searchAfter => + sourceBuilder.searchAfter(searchAfter) + } + + // Set PIT + val pitBuilder = new PointInTimeBuilder(pitId) + pitBuilder.setKeepAlive( + TimeValue.parseTimeValue(config.keepAlive, "pit_keep_alive") + ) + sourceBuilder.pointInTimeBuilder(pitBuilder) + + // Build request with PIT + val searchRequest = new SearchRequest() + .source(sourceBuilder) + .requestCache(false) // Disable cache for PIT + + val response = apply().search(searchRequest, RequestOptions.DEFAULT) + + if (response.status() != RestStatus.OK) { + throw new IOException( + s"PIT search_after failed with status: ${response.status()}" + ) + } + + val hits = extractHitsOnly(response, fieldAliases) + + if (hits.isEmpty) { + closePit(pitId) + None + } else { + val searchHits = response.getHits.getHits + val lastHit = searchHits.last + val nextSearchAfter = Option(lastHit.getSortValues) + + logger.debug(s"Retrieved ${hits.size} hits, continuing with PIT") + Some((nextSearchAfter, hits)) + } + } + }(system, logger).recover { case ex: Exception => + logger.error(s"PIT search_after failed after retries: ${ex.getMessage}", ex) + closePit(pitId) + None + } + } + .watchTermination() { (_, done) => + done.onComplete { + case scala.util.Success(_) => + logger.info(s"PIT search_after completed, closing PIT: ${pitId.take(20)}...") + closePit(pitId) + case scala.util.Failure(ex) => + logger.error( + s"PIT search_after failed: ${ex.getMessage}, closing PIT: ${pitId.take(20)}..." + ) + closePit(pitId) + } + NotUsed + } + .mapConcat(identity) + } + } + .mapMaterializedValue(_ => NotUsed) + } + + /** Open PIT (ES 7.10+) + */ + private def openPit(indices: Seq[String], keepAlive: String)(implicit + ec: ExecutionContext + ): Future[String] = { + Future { + logger.debug(s"Opening PIT for indices: ${indices.mkString(", ")}") + + val openPitRequest = new OpenPointInTimeRequest(indices: _*) + .keepAlive(TimeValue.parseTimeValue(keepAlive, "pit_keep_alive")) + + val response = apply().openPointInTime(openPitRequest, RequestOptions.DEFAULT) + val pitId = response.getPointInTimeId + + if (pitId == null || pitId.isEmpty) { + throw new IllegalStateException("PIT ID is null or empty") + } + + logger.info(s"PIT opened: ${pitId.take(20)}...") + pitId + }.recoverWith { case ex: Exception => + logger.error(s"Failed to open PIT: ${ex.getMessage}", ex) + Future.failed( + new IOException(s"Failed to open PIT for indices: ${indices.mkString(", ")}", ex) + ) + } + } + + /** Close PIT + */ + private def closePit(pitId: String): Unit = { + Try { + logger.debug(s"Closing PIT: ${pitId.take(20)}...") + + val closePitRequest = new ClosePointInTimeRequest(pitId) + val response = apply().closePointInTime(closePitRequest, RequestOptions.DEFAULT) + + if (response.isSucceeded) { + logger.info(s"PIT closed successfully: ${pitId.take(20)}...") + } else { + logger.warn(s"PIT close reported failure: ${pitId.take(20)}...") + } + }.recover { case ex: Exception => + logger.warn(s"Failed to close PIT ${pitId.take(20)}...: ${ex.getMessage}") + } + } + + /** Extract ALL results: hits + aggregations This is crucial for queries with aggregations + */ + private def extractAllResults( + response: SearchResponse, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + ): Seq[Map[String, Any]] = { + val jsonString = response.toString + val sqlResponse = + ElasticResponse("", jsonString, fieldAliases, aggregations.map(kv => kv._1 -> kv._2)) + + parseResponse(sqlResponse) match { + case Success(rows) => + logger.debug(s"Parsed ${rows.size} rows from response") + rows + case Failure(ex) => + logger.error(s"Failed to parse scroll response: ${ex.getMessage}", ex) + Seq.empty + } + } + + /** Extract ONLY hits (for search_after optimization) + */ + private def extractHitsOnly( + response: SearchResponse, + fieldAliases: Map[String, String] + ): Seq[Map[String, Any]] = { + val jsonString = response.toString + val sqlResponse = ElasticResponse("", jsonString, fieldAliases, Map.empty) + + parseResponse(sqlResponse) match { + case Success(rows) => rows + case Failure(ex) => + logger.error(s"Failed to parse search after response: ${ex.getMessage}", ex) + Seq.empty } } - override implicit def toBulkElasticResult(r: BulkResponse): BulkElasticResult = { - new BulkElasticResult { - override def items: List[BulkElasticResultItem] = - r.getItems.toList.map(toBulkElasticResultItem) + private def clearScroll(scrollId: String): Unit = { + Try { + logger.debug(s"Clearing scroll: $scrollId") + val clearScrollRequest = new ClearScrollRequest() + clearScrollRequest.addScrollId(scrollId) + apply().clearScroll(clearScrollRequest, RequestOptions.DEFAULT) + }.recover { case ex: Exception => + logger.warn(s"Failed to clear scroll $scrollId: ${ex.getMessage}") } } } diff --git a/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientCompanion.scala b/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientCompanion.scala index a2bff72e..13c9e322 100644 --- a/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientCompanion.scala +++ b/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientCompanion.scala @@ -16,58 +16,94 @@ package app.softnetwork.elastic.client.rest -import app.softnetwork.elastic.client.ElasticConfig -import org.apache.http.HttpHost +import app.softnetwork.elastic.client.ElasticClientCompanion +import org.elasticsearch.client.{RequestOptions, RestClient, RestClientBuilder, RestHighLevelClient} import org.apache.http.auth.{AuthScope, UsernamePasswordCredentials} import org.apache.http.impl.client.BasicCredentialsProvider -import org.apache.http.impl.nio.client.HttpAsyncClientBuilder -import org.elasticsearch.client.{RestClient, RestClientBuilder, RestHighLevelClient} +import org.elasticsearch.search.SearchModule import org.elasticsearch.common.settings.Settings -import org.elasticsearch.xcontent.NamedXContentRegistry import org.elasticsearch.plugins.SearchPlugin -import org.elasticsearch.search.SearchModule +import org.elasticsearch.xcontent.NamedXContentRegistry import org.slf4j.{Logger, LoggerFactory} -import scala.collection.JavaConverters._ +import scala.jdk.CollectionConverters._ +import scala.util.{Failure, Success, Try} -trait RestHighLevelClientCompanion { +/** Thread-safe companion for RestHighLevelClient with lazy initialization and proper resource + * management + */ +trait RestHighLevelClientCompanion extends ElasticClientCompanion[RestHighLevelClient] { val logger: Logger = LoggerFactory getLogger getClass.getName - def elasticConfig: ElasticConfig - - private var client: Option[RestHighLevelClient] = None - + /** Lazy-initialized NamedXContentRegistry (thread-safe by Scala lazy val) + */ lazy val namedXContentRegistry: NamedXContentRegistry = { -// import scala.jdk.CollectionConverters._ val searchModule = new SearchModule(Settings.EMPTY, false, List.empty[SearchPlugin].asJava) new NamedXContentRegistry(searchModule.getNamedXContents) } - def apply(): RestHighLevelClient = { - client match { - case Some(c) => c - case _ => + /** Create and configure RestHighLevelClient Separated for better testability and error handling + */ + override protected def createClient(): RestHighLevelClient = { + try { + val restClientBuilder = buildRestClient() + new RestHighLevelClient(restClientBuilder) + } catch { + case ex: Exception => + logger.error(s"Failed to create RestHighLevelClient: ${ex.getMessage}", ex) + throw new IllegalStateException("Cannot create Elasticsearch client", ex) + } + } + + /** Build RestClientBuilder with credentials and configuration + */ + private def buildRestClient(): RestClientBuilder = { + val httpHost = parseHttpHost(elasticConfig.credentials.url) + + val builder = RestClient + .builder(httpHost) + .setRequestConfigCallback { requestConfigBuilder => + requestConfigBuilder + .setConnectTimeout(elasticConfig.connectionTimeout.toMillis.toInt) + .setSocketTimeout(elasticConfig.socketTimeout.toMillis.toInt) + } + + // Add credentials if provided + if (elasticConfig.credentials.username.nonEmpty) { + builder.setHttpClientConfigCallback { httpClientBuilder => val credentialsProvider = new BasicCredentialsProvider() - if (elasticConfig.credentials.username.nonEmpty) { - credentialsProvider.setCredentials( - AuthScope.ANY, - new UsernamePasswordCredentials( - elasticConfig.credentials.username, - elasticConfig.credentials.password - ) - ) - } - val restClientBuilder: RestClientBuilder = RestClient - .builder( - HttpHost.create(elasticConfig.credentials.url) - ) - .setHttpClientConfigCallback((httpAsyncClientBuilder: HttpAsyncClientBuilder) => - httpAsyncClientBuilder.setDefaultCredentialsProvider(credentialsProvider) + credentialsProvider.setCredentials( + AuthScope.ANY, + new UsernamePasswordCredentials( + elasticConfig.credentials.username, + elasticConfig.credentials.password ) - val c = new RestHighLevelClient(restClientBuilder) - client = Some(c) - c + ) + httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider) + } + } else { + builder } } + + /** Test connection to Elasticsearch cluster + * @return + * true if connection is successful + */ + override def testConnection(): Boolean = { + Try { + val c = apply() + val response = c.info(RequestOptions.DEFAULT) + logger.info(s"Connected to Elasticsearch ${response.getVersion.getNumber}") + true + } match { + case Success(result) => result + case Failure(ex) => + logger.error(s"Connection test failed: ${ex.getMessage}", ex) + incrementFailures() + false + } + } + } diff --git a/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientHelpers.scala b/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientHelpers.scala new file mode 100644 index 00000000..afa5f20e --- /dev/null +++ b/es7/rest/src/main/scala/app/softnetwork/elastic/client/rest/RestHighLevelClientHelpers.scala @@ -0,0 +1,449 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.rest + +import app.softnetwork.elastic.client.ElasticClientHelpers +import app.softnetwork.elastic.client.result.{ElasticError, ElasticResult} + +import scala.concurrent.Promise +import scala.util.{Failure, Success, Try} + +trait RestHighLevelClientHelpers extends ElasticClientHelpers { _: RestHighLevelClientCompanion => + + // ======================================================================== + // GENERIC METHODS FOR EXECUTING REST HIGH LEVEL CLIENT ACTIONS + // ======================================================================== + + //format:off + /** Execute a Rest High Level Client action with a generic transformation of the result. + * + * @tparam Req + * type of the request + * @tparam Resp + * type of the response + * @tparam T + * type of the desired final result + * @param operation + * name of the operation (for logging and error context) + * @param index + * relevant index (optional, for logging) + * @param retryable + * true if the operation can be retried in case of a transient error + * @param request + * the request to be executed + * @param executor + * function executing the request and returning the response + * @param transformer + * function transforming the response into T + * @return + * ElasticResult[T] + * + * @example + * {{{ + * executeRestAction[CreateIndexRequest, CreateIndexResponse, Boolean]( + * operation = "createIndex", + * index = Some("my-index"), + * retryable = false + * )( + * request = new CreateIndexRequest("my-index") + * )( + * executor = req => apply().indices().create(req, RequestOptions.DEFAULT) + * )( + * transformer = resp => resp.isAcknowledged + * ) + * }}} + */ + //format:on + private[client] def executeRestAction[Req, Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + request: => Req + )( + executor: Req => Resp + )( + transformer: Resp => T + ): ElasticResult[T] = { + val indexStr = index.map(i => s" on index '$i'").getOrElse("") + logger.debug(s"Executing operation '$operation'$indexStr") + + // ✅ Execution with exception handling + val tryResult: Try[Resp] = Try { + executor(request) + } + + // ✅ Conversion to ElasticResult[Resp] + val elasticResult: ElasticResult[Resp] = tryResult match { + case Success(result) => + ElasticResult.success(result) + case Failure(ex: org.elasticsearch.ElasticsearchException) => + // Extract status code from Elasticsearch exception + val statusCode = Option(ex.status()).map(_.getStatus) + logger.error( + s"Elasticsearch exception during operation '$operation'$indexStr: ${ex.getMessage}", + ex + ) + ElasticResult.failure( + ElasticError( + message = s"Elasticsearch error during $operation: ${ex.getDetailedMessage}", + cause = Some(ex), + statusCode = statusCode, + operation = Some(operation) + ) + ) + case Failure(ex) => + logger.error(s"Exception during operation '$operation'$indexStr: ${ex.getMessage}", ex) + ElasticResult.failure( + ElasticError( + message = s"Exception during $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + } + + // ✅ Apply transformation + elasticResult.flatMap { result => + Try(transformer(result)) match { + case Success(transformed) => + logger.debug(s"Operation '$operation'$indexStr succeeded") + ElasticResult.success(transformed) + case Failure(ex) => + logger.error(s"Transformation failed for operation '$operation'$indexStr", ex) + ElasticResult.failure( + ElasticError( + message = s"Failed to transform result: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + } + } + } + + /** Simplified variant for operations returning Boolean values (acknowledged). + * + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param request + * the request to be executed + * @param executor + * function executing the request + * @return + * ElasticResult[Boolean] + */ + private[client] def executeRestBooleanAction[ + Req, + Resp <: org.elasticsearch.action.support.master.AcknowledgedResponse + ]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + request: => Req + )( + executor: Req => Resp + ): ElasticResult[Boolean] = { + executeRestAction[Req, Resp, Boolean](operation, index, retryable)(request)(executor)( + _.isAcknowledged + ) + } + + //format:off + /** Variant to execute an action using the low-level REST client. Useful for operations not + * supported by the high-level client. + * + * @tparam T + * type of the final result + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param request + * the low-level Request + * @param transformer + * function transforming the Response into T + * @return + * ElasticResult[T] + * + * @example + * {{{ + * executeRestLowLevelAction[String]( + * operation = "customEndpoint", + * index = Some("my-index") + * )( + * request = new Request("GET", "/my-index/_custom") + * )( + * transformer = resp => EntityUtils.toString(resp.getEntity) + * ) + * }}} + */ + //format:on + private[client] def executeRestLowLevelAction[T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + request: => org.elasticsearch.client.Request + )( + transformer: org.elasticsearch.client.Response => T + ): ElasticResult[T] = { + val indexStr = index.map(i => s" on index '$i'").getOrElse("") + logger.debug(s"Executing low-level operation '$operation'$indexStr") + + // ✅ Execution with exception handling + val tryResult: Try[org.elasticsearch.client.Response] = Try { + apply().getLowLevelClient.performRequest(request) + } + + // ✅ Conversion to ElasticResult[Response] + val elasticResult: ElasticResult[org.elasticsearch.client.Response] = tryResult match { + case Success(result) => + ElasticResult.success(result) + case Failure(ex: org.elasticsearch.client.ResponseException) => + val statusCode = Some(ex.getResponse.getStatusLine.getStatusCode) + logger.error( + s"Response exception during operation '$operation'$indexStr: ${ex.getMessage}", + ex + ) + ElasticResult.failure( + ElasticError( + message = s"HTTP error during $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = statusCode, + operation = Some(operation) + ) + ) + case Failure(ex) => + logger.error(s"Exception during operation '$operation'$indexStr: ${ex.getMessage}", ex) + ElasticResult.failure( + ElasticError( + message = s"Exception during $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + } + + // ✅ Check status and apply transformation + elasticResult.flatMap { result => + val statusCode = result.getStatusLine.getStatusCode + + if (statusCode >= 200 && statusCode < 300) { + // ✅ Success: applying the transformation + Try(transformer(result)) match { + case Success(transformed) => + logger.debug(s"Operation '$operation'$indexStr succeeded with status $statusCode") + ElasticResult.success(transformed) + case Failure(ex) => + logger.error(s"Transformation failed for operation '$operation'$indexStr", ex) + ElasticResult.failure( + ElasticError( + message = s"Failed to transform result: ${ex.getMessage}", + cause = Some(ex), + statusCode = Some(statusCode), + operation = Some(operation) + ) + ) + } + } else { + // ✅ Failure: extract the error + val errorMessage = Option(result.getStatusLine.getReasonPhrase) + .filter(_.nonEmpty) + .getOrElse("Unknown error") + + val error = ElasticError( + message = errorMessage, + cause = None, + statusCode = Some(statusCode), + operation = Some(operation) + ) + + logError(operation, indexStr, error) + ElasticResult.failure(error) + } + } + } + + //format:off + /** Asynchronous variant to execute a Rest High Level Client action. + * + * @tparam Req + * type of the request + * @tparam Resp + * type of the response + * @tparam T + * type of the desired final result + * @param operation + * name of the operation + * @param index + * relevant index (optional) + * @param retryable + * true if retryable + * @param request + * the request to be executed + * @param executor + * function executing the request asynchronously + * @param transformer + * function transforming the response into T + * @return + * Future[ElasticResult[T]] + * + * @example + * {{{ + * executeAsyncRestAction[IndexRequest, IndexResponse, String]( + * operation = "indexDocument", + * index = Some("my-index") + * )( + * request = new IndexRequest("my-index").source(...) + * )( + * executor = (req, listener) => apply().indexAsync(req, RequestOptions.DEFAULT, listener) + * )( + * transformer = resp => resp.getId + * ) + * }}} + */ + //format:on + private[client] def executeAsyncRestAction[Req, Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + request: => Req + )( + executor: (Req, org.elasticsearch.action.ActionListener[Resp]) => Unit + )( + transformer: Resp => T + )(implicit ec: scala.concurrent.ExecutionContext): scala.concurrent.Future[ElasticResult[T]] = { + val indexStr = index.map(i => s" on index '$i'").getOrElse("") + logger.debug(s"Executing operation '$operation'$indexStr asynchronously") + + val promise: Promise[ElasticResult[T]] = Promise() + + try { + val listener = new org.elasticsearch.action.ActionListener[Resp] { + override def onResponse(response: Resp): Unit = { + logger.debug(s"Operation '$operation'$indexStr succeeded asynchronously") + + // ✅ Success: applying the transformation + Try(transformer(response)) match { + case Success(transformed) => + promise.success(ElasticResult.success(transformed)) + case Failure(ex) => + logger.error(s"Transformation failed for operation '$operation'$indexStr", ex) + promise.success( + ElasticResult.failure( + ElasticError( + message = s"Failed to transform result: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + ) + } + } + + override def onFailure(ex: Exception): Unit = { + val (message, statusCode) = ex match { + case esEx: org.elasticsearch.ElasticsearchException => + ( + s"Elasticsearch error during $operation: ${esEx.getDetailedMessage}", + Option(esEx.status()).map(_.getStatus) + ) + case _ => + (s"Exception during $operation: ${ex.getMessage}", None) + } + + logger.error(s"Exception during operation '$operation'$indexStr: ${ex.getMessage}", ex) + + promise.success( + ElasticResult.failure( + ElasticError( + message = message, + cause = Some(ex), + statusCode = statusCode, + operation = Some(operation) + ) + ) + ) + } + } + + executor(request, listener) + } catch { + case ex: Exception => + logger.error(s"Failed to initiate async operation '$operation'$indexStr", ex) + promise.success( + ElasticResult.failure( + ElasticError( + message = s"Failed to initiate $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + ) + } + + promise.future + } + + /** Simplified asynchronous variant for operations returning Boolean values. + * + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param request + * the request to be executed + * @param executor + * function executing the request asynchronously + * @return + * Future of ElasticResult[Boolean] + */ + private[client] def executeAsyncRestBooleanAction[ + Req, + Resp <: org.elasticsearch.action.support.master.AcknowledgedResponse + ]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + request: => Req + )( + executor: (Req, org.elasticsearch.action.ActionListener[Resp]) => Unit + )(implicit + ec: scala.concurrent.ExecutionContext + ): scala.concurrent.Future[ElasticResult[Boolean]] = { + executeAsyncRestAction[Req, Resp, Boolean](operation, index, retryable)(request)(executor)( + _.isAcknowledged + ) + } +} diff --git a/es7/rest/src/main/scala/app/softnetwork/elastic/client/spi/RestHighLevelClientSpi.scala b/es7/rest/src/main/scala/app/softnetwork/elastic/client/spi/RestHighLevelClientSpi.scala new file mode 100644 index 00000000..9f17cb37 --- /dev/null +++ b/es7/rest/src/main/scala/app/softnetwork/elastic/client/spi/RestHighLevelClientSpi.scala @@ -0,0 +1,46 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.spi + +import app.softnetwork.elastic.client.ElasticClientApi +import app.softnetwork.elastic.client.rest.RestHighLevelClientApi +import com.typesafe.config.Config + +class RestHighLevelClientSpi extends ElasticClientSpi { + + //format:off + /** Creates an Elasticsearch client instance. + * + * @param conf + * Typesafe configuration containing Elasticsearch parameters + * @return + * Configured ElasticClientApi instance + * @example + * {{{ + * class MyElasticClientProvider extends ElasticClientSpi { + * override def client(config: Config): ElasticClientApi = { + * new MyElasticClientImpl(config) + * } + * } + * }}} + */ + //format:on + override def client(conf: Config): ElasticClientApi = + new RestHighLevelClientApi { + override def config: Config = conf + } +} diff --git a/es7/rest/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithRestProvider.scala b/es7/rest/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithRestProvider.scala deleted file mode 100644 index 2e6eee32..00000000 --- a/es7/rest/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithRestProvider.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2025 SOFTNETWORK - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package app.softnetwork.elastic.persistence.query - -import app.softnetwork.persistence.message.CrudEvent -import app.softnetwork.persistence.model.Timestamped -import app.softnetwork.persistence.query.{JournalProvider, OffsetProvider} - -trait State2ElasticProcessorStreamWithRestProvider[T <: Timestamped, E <: CrudEvent] - extends State2ElasticProcessorStream[T, E] - with RestHighLevelClientProvider[T] { _: JournalProvider with OffsetProvider => } diff --git a/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientCompanionSpec.scala b/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientCompanionSpec.scala new file mode 100644 index 00000000..532ffad9 --- /dev/null +++ b/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientCompanionSpec.scala @@ -0,0 +1,122 @@ +package app.softnetwork.elastic.client + +import akka.actor.ActorSystem +import app.softnetwork.elastic.client.rest.RestHighLevelClientCompanion +import app.softnetwork.elastic.scalatest.ElasticDockerTestKit +import app.softnetwork.persistence.generateUUID +import com.typesafe.config.ConfigFactory +import configs.ConfigReader +import org.scalatest.concurrent.ScalaFutures +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import org.slf4j.{Logger, LoggerFactory} + +import java.util.concurrent.TimeUnit +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, ExecutionContextExecutor, Future} +import scala.util.Try + +class RestHighLevelClientCompanionSpec + extends AnyWordSpec + with ElasticDockerTestKit + with Matchers + with ScalaFutures { + + lazy val log: Logger = LoggerFactory getLogger getClass.getName + + implicit val system: ActorSystem = ActorSystem(generateUUID()) + + implicit val executionContext: ExecutionContextExecutor = system.dispatcher + + override def afterAll(): Unit = { + Await.result(system.terminate(), Duration(30, TimeUnit.SECONDS)) + super.afterAll() + } + + "RestHighLevelClientCompanion" should { + + "initialize client lazily" in { + val companion = TestCompanion() + companion.isInitialized shouldBe false + + val client = companion.apply() + client should not be null + companion.isInitialized shouldBe true + } + + "return same instance on multiple calls" in { + val companion = TestCompanion() + val client1 = companion.apply() + val client2 = companion.apply() + + client1 should be theSameInstanceAs client2 + } + + "be thread-safe during initialization" in { + val companion = TestCompanion() + val futures = (1 to 100).map { _ => + Future { + companion.apply() + } + } + + val clients = Future.sequence(futures).futureValue + + // Tous les clients doivent être la même instance + clients.distinct.size shouldBe 1 + } + + "close client properly" in { + val companion = TestCompanion() + companion.apply() + companion.isInitialized shouldBe true + + companion.close() + companion.isInitialized shouldBe false + } + + "handle invalid URL gracefully" in { + val companion = TestCompanion("invalid-url") + + Try(an[IllegalArgumentException] should be thrownBy { + companion.apply() + }) + } + + "test connection successfully" in { + val companion = TestCompanion() + companion.testConnection() shouldBe true + } + } + + case class TestCompanion(config: ElasticConfig) extends RestHighLevelClientCompanion { + override def elasticConfig: ElasticConfig = config + } + + object TestCompanion { + def apply(): TestCompanion = TestCompanion( + ConfigReader[ElasticConfig] + .read(elasticConfig.withFallback(ConfigFactory.load("softnetwork-elastic.conf")), "elastic") + .toEither match { + case Left(configError) => + throw configError.configException + case Right(r) => r + } + ) + + def apply(url: String): TestCompanion = TestCompanion( + ConfigReader[ElasticConfig] + .read( + ConfigFactory + .parseString(elasticConfigAsString) + .withFallback(ConfigFactory.load("softnetwork-elastic.conf")), + "elastic" + ) + .toEither match { + case Left(configError) => + throw configError.configException + case Right(r) => r.copy(credentials = ElasticCredentials(url)) + } + ) + } +} diff --git a/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientSpec.scala b/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientSpec.scala index b8aeba67..9ea35a61 100644 --- a/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientSpec.scala +++ b/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelClientSpec.scala @@ -1,28 +1,3 @@ package app.softnetwork.elastic.client -import app.softnetwork.elastic.client.RestHighLevelProviders.{ - BinaryProvider, - ParentProvider, - PersonProvider, - SampleProvider -} -import app.softnetwork.elastic.model.{Binary, Parent, Sample} -import app.softnetwork.elastic.persistence.query.ElasticProvider -import app.softnetwork.persistence.person.model.Person - -class RestHighLevelClientSpec extends ElasticClientSpec { - - lazy val pClient: ElasticProvider[Person] with ElasticClientApi = new PersonProvider( - elasticConfig - ) - lazy val sClient: ElasticProvider[Sample] with ElasticClientApi = new SampleProvider( - elasticConfig - ) - lazy val bClient: ElasticProvider[Binary] with ElasticClientApi = new BinaryProvider( - elasticConfig - ) - - override def parentClient: ElasticProvider[Parent] with ElasticClientApi = new ParentProvider( - elasticConfig - ) -} +class RestHighLevelClientSpec extends ElasticClientSpec diff --git a/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelProviders.scala b/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelProviders.scala deleted file mode 100644 index 4356c541..00000000 --- a/es7/rest/src/test/scala/app/softnetwork/elastic/client/RestHighLevelProviders.scala +++ /dev/null @@ -1,51 +0,0 @@ -package app.softnetwork.elastic.client - -import app.softnetwork.elastic.model.{Binary, Parent, Sample} -import app.softnetwork.elastic.persistence.query.RestHighLevelClientProvider -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.person.model.Person -import com.typesafe.config.Config -import org.elasticsearch.client.RestHighLevelClient - -object RestHighLevelProviders { - - class PersonProvider(es: Config) - extends RestHighLevelClientProvider[Person] - with ManifestWrapper[Person] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val restHighLevelClient: RestHighLevelClient = apply() - } - - class SampleProvider(es: Config) - extends RestHighLevelClientProvider[Sample] - with ManifestWrapper[Sample] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val restHighLevelClient: RestHighLevelClient = apply() - } - - class BinaryProvider(es: Config) - extends RestHighLevelClientProvider[Binary] - with ManifestWrapper[Binary] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val restHighLevelClient: RestHighLevelClient = apply() - } - - class ParentProvider(es: Config) - extends RestHighLevelClientProvider[Parent] - with ManifestWrapper[Parent] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val restHighLevelClient: RestHighLevelClient = apply() - } -} diff --git a/es7/rest/src/test/scala/app/softnetwork/elastic/persistence/person/RestHighLevelClientPersonHandlerSpec.scala b/es7/rest/src/test/scala/app/softnetwork/elastic/persistence/person/RestHighLevelClientPersonHandlerSpec.scala index 4e3efbe5..35f469f8 100644 --- a/es7/rest/src/test/scala/app/softnetwork/elastic/persistence/person/RestHighLevelClientPersonHandlerSpec.scala +++ b/es7/rest/src/test/scala/app/softnetwork/elastic/persistence/person/RestHighLevelClientPersonHandlerSpec.scala @@ -1,37 +1,3 @@ package app.softnetwork.elastic.persistence.person -import akka.actor.typed.ActorSystem -import app.softnetwork.elastic.client.rest.RestHighLevelClientApi -import app.softnetwork.elastic.persistence.person.ElasticPersonTestKit -import app.softnetwork.elastic.persistence.query.{ElasticProvider, PersonToElasticProcessorStream} -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.person.model.Person -import app.softnetwork.persistence.person.query.PersonToExternalProcessorStream -import app.softnetwork.persistence.query.ExternalPersistenceProvider -import com.typesafe.config.Config -import org.slf4j.{Logger, LoggerFactory} - -import scala.concurrent.ExecutionContextExecutor - -class RestHighLevelClientPersonHandlerSpec extends ElasticPersonTestKit { - - implicit val ec: ExecutionContextExecutor = typedSystem().executionContext - - override def externalPersistenceProvider: ExternalPersistenceProvider[Person] = - new ElasticProvider[Person] with RestHighLevelClientApi with ManifestWrapper[Person] { - override protected val manifestWrapper: ManifestW = ManifestW() - override lazy val config: Config = RestHighLevelClientPersonHandlerSpec.this.elasticConfig - } - - override def person2ExternalProcessorStream: ActorSystem[_] => PersonToExternalProcessorStream = - sys => - new PersonToElasticProcessorStream with RestHighLevelClientApi { - override val forTests: Boolean = true - override protected val manifestWrapper: ManifestW = ManifestW() - override implicit def system: ActorSystem[_] = sys - override def log: Logger = LoggerFactory getLogger getClass.getName - override lazy val config: Config = RestHighLevelClientPersonHandlerSpec.this.elasticConfig - } - - override def log: Logger = LoggerFactory getLogger getClass.getName -} +class RestHighLevelClientPersonHandlerSpec extends ElasticClientPersonHandlerSpec diff --git a/es8/java/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi b/es8/java/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi new file mode 100644 index 00000000..e4b5ab16 --- /dev/null +++ b/es8/java/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi @@ -0,0 +1 @@ +app.softnetwork.elastic.client.spi.JavaClientSpi \ No newline at end of file diff --git a/es8/java/src/main/scala/app/softnetwork/elastic/client/java/ElasticsearchClientApi.scala b/es8/java/src/main/scala/app/softnetwork/elastic/client/java/ElasticsearchClientApi.scala deleted file mode 100644 index c13f2ee7..00000000 --- a/es8/java/src/main/scala/app/softnetwork/elastic/client/java/ElasticsearchClientApi.scala +++ /dev/null @@ -1,1017 +0,0 @@ -/* - * Copyright 2025 SOFTNETWORK - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package app.softnetwork.elastic.client.java - -import akka.NotUsed -import akka.actor.ActorSystem -import akka.stream.scaladsl.Flow -import app.softnetwork.elastic.client._ -import app.softnetwork.elastic.sql.bridge._ -import app.softnetwork.elastic.sql.query.{SQLQuery, SQLSearchRequest} -import app.softnetwork.elastic.{client, sql} -import app.softnetwork.persistence.model.Timestamped -import app.softnetwork.serialization.serialization -import co.elastic.clients.elasticsearch.core.bulk.{ - BulkOperation, - BulkResponseItem, - DeleteOperation, - IndexOperation, - UpdateAction, - UpdateOperation -} -import co.elastic.clients.elasticsearch.core.msearch.{ - MultisearchBody, - MultisearchHeader, - RequestItem -} -import co.elastic.clients.elasticsearch.core._ -import co.elastic.clients.elasticsearch.core.reindex.{Destination, Source} -import co.elastic.clients.elasticsearch.indices.update_aliases.{Action, AddAction, RemoveAction} -import co.elastic.clients.elasticsearch.indices.{ExistsRequest => IndexExistsRequest, _} -import co.elastic.clients.json.jackson.JacksonJsonpMapper -import com.google.gson.{Gson, JsonParser} - -import _root_.java.io.{StringReader, StringWriter} -import _root_.java.util.{Map => JMap} -import scala.collection.JavaConverters._ -//import scala.jdk.CollectionConverters._ -import org.json4s.Formats - -import scala.concurrent.{ExecutionContext, Future, Promise} -import scala.language.implicitConversions -import scala.util.{Failure, Success, Try} - -trait ElasticsearchClientApi - extends ElasticClientApi - with ElasticsearchClientIndicesApi - with ElasticsearchClientAliasApi - with ElasticsearchClientSettingsApi - with ElasticsearchClientMappingApi - with ElasticsearchClientRefreshApi - with ElasticsearchClientFlushApi - with ElasticsearchClientCountApi - with ElasticsearchClientSingleValueAggregateApi - with ElasticsearchClientIndexApi - with ElasticsearchClientUpdateApi - with ElasticsearchClientDeleteApi - with ElasticsearchClientGetApi - with ElasticsearchClientSearchApi - with ElasticsearchClientBulkApi - -trait ElasticsearchClientIndicesApi extends IndicesApi with ElasticsearchClientCompanion { - override def createIndex(index: String, settings: String): Boolean = { - tryOrElse( - apply() - .indices() - .create( - new CreateIndexRequest.Builder() - .index(index) - .settings(new IndexSettings.Builder().withJson(new StringReader(settings)).build()) - .build() - ) - .acknowledged(), - false - )(logger) - } - - override def deleteIndex(index: String): Boolean = { - tryOrElse( - apply() - .indices() - .delete(new DeleteIndexRequest.Builder().index(index).build()) - .acknowledged(), - false - )(logger) - } - - override def openIndex(index: String): Boolean = { - tryOrElse( - apply().indices().open(new OpenRequest.Builder().index(index).build()).acknowledged(), - false - )(logger) - } - - override def closeIndex(index: String): Boolean = { - tryOrElse( - apply().indices().close(new CloseIndexRequest.Builder().index(index).build()).acknowledged(), - false - )(logger) - } - - override def reindex( - sourceIndex: String, - targetIndex: String, - refresh: Boolean = true - ): Boolean = { - val failures = apply() - .reindex( - new ReindexRequest.Builder() - .source(new Source.Builder().index(sourceIndex).build()) - .dest(new Destination.Builder().index(targetIndex).build()) - .refresh(refresh) - .build() - ) - .failures() - .asScala - .map(_.cause().reason()) - if (failures.nonEmpty) { - logger.error( - s"Reindexing from $sourceIndex to $targetIndex failed with errors: ${failures.take(100).mkString(", ")}" - ) - } - failures.isEmpty - } - - override def indexExists(index: String): Boolean = { - tryOrElse( - apply() - .indices() - .exists( - new IndexExistsRequest.Builder().index(index).build() - ) - .value(), - false - )(logger) - } -} - -trait ElasticsearchClientAliasApi extends AliasApi with ElasticsearchClientCompanion { - override def addAlias(index: String, alias: String): Boolean = { - tryOrElse( - apply() - .indices() - .updateAliases( - new UpdateAliasesRequest.Builder() - .actions( - new Action.Builder() - .add(new AddAction.Builder().index(index).alias(alias).build()) - .build() - ) - .build() - ) - .acknowledged(), - false - )(logger) - } - - override def removeAlias(index: String, alias: String): Boolean = { - tryOrElse( - apply() - .indices() - .updateAliases( - new UpdateAliasesRequest.Builder() - .actions( - new Action.Builder() - .remove(new RemoveAction.Builder().index(index).alias(alias).build()) - .build() - ) - .build() - ) - .acknowledged(), - false - )(logger) - } -} - -trait ElasticsearchClientSettingsApi extends SettingsApi with ElasticsearchClientCompanion { - _: ElasticsearchClientIndicesApi => - - override def updateSettings(index: String, settings: String): Boolean = { - tryOrElse( - apply() - .indices() - .putSettings( - new PutIndicesSettingsRequest.Builder() - .index(index) - .settings(new IndexSettings.Builder().withJson(new StringReader(settings)).build()) - .build() - ) - .acknowledged(), - false - )(logger) - } - - override def loadSettings(index: String): String = { - tryOrElse( - Option( - apply() - .indices() - .getSettings( - new GetIndicesSettingsRequest.Builder().index(index).build() - ) - .result() - .get(index) - ).map { value => - val mapper = new JacksonJsonpMapper() - val writer = new StringWriter() - val generator = mapper.jsonProvider().createGenerator(writer) - mapper.serialize(value.settings().index(), generator) - generator.close() - writer.toString - }, - None - )(logger).getOrElse("{}") - } -} - -trait ElasticsearchClientMappingApi - extends MappingApi - with ElasticsearchClientIndicesApi - with ElasticsearchClientRefreshApi - with ElasticsearchClientCompanion { - override def setMapping(index: String, mapping: String): Boolean = { - tryOrElse( - apply() - .indices() - .putMapping( - new PutMappingRequest.Builder().index(index).withJson(new StringReader(mapping)).build() - ) - .acknowledged(), - false - )(logger) - } - - override def getMapping(index: String): String = { - tryOrElse( - { - Option( - apply() - .indices() - .getMapping( - new GetMappingRequest.Builder().index(index).build() - ) - .result() - .get(index) - ).map { value => - val mapper = new JacksonJsonpMapper() - val writer = new StringWriter() - val generator = mapper.jsonProvider().createGenerator(writer) - mapper.serialize(value, generator) - generator.close() - writer.toString - } - }, - None - )(logger).getOrElse(s""""{$index: {"mappings": {}}}""") - } -} - -trait ElasticsearchClientRefreshApi extends RefreshApi with ElasticsearchClientCompanion { - override def refresh(index: String): Boolean = { - tryOrElse( - apply() - .indices() - .refresh( - new RefreshRequest.Builder().index(index).build() - ) - .shards() - .failed() - .intValue() == 0, - false - )(logger) - } -} - -trait ElasticsearchClientFlushApi extends FlushApi with ElasticsearchClientCompanion { - override def flush(index: String, force: Boolean = true, wait: Boolean = true): Boolean = { - tryOrElse( - apply() - .indices() - .flush( - new FlushRequest.Builder().index(index).force(force).waitIfOngoing(wait).build() - ) - .shards() - .failed() - .intValue() == 0, - false - )(logger) - } -} - -trait ElasticsearchClientCountApi extends CountApi with ElasticsearchClientCompanion { - override def count(query: client.JSONQuery): Option[Double] = { - tryOrElse( - Option( - apply() - .count( - new CountRequest.Builder().index(query.indices.asJava).build() - ) - .count() - .toDouble - ), - None - )(logger) - } - - override def countAsync(query: client.JSONQuery)(implicit - ec: ExecutionContext - ): Future[Option[Double]] = { - fromCompletableFuture( - async() - .count( - new CountRequest.Builder().index(query.indices.asJava).build() - ) - ).map(response => Option(response.count().toDouble)) - } -} - -trait ElasticsearchClientSingleValueAggregateApi - extends SingleValueAggregateApi - with ElasticsearchClientCountApi { - private[this] def aggregateValue(value: Double, valueAsString: String): AggregateValue = - if (valueAsString.nonEmpty) StringValue(valueAsString) - else NumericValue(value) - - override def aggregate( - sqlQuery: SQLQuery - )(implicit ec: ExecutionContext): Future[Seq[SingleValueAggregateResult]] = { - val aggregations: Seq[ElasticAggregation] = sqlQuery - val futures = for (aggregation <- aggregations) yield { - val promise: Promise[SingleValueAggregateResult] = Promise() - val field = aggregation.field - val sourceField = aggregation.sourceField - val aggType = aggregation.aggType - val aggName = aggregation.aggName - val query = aggregation.query.getOrElse("") - val sources = aggregation.sources - sourceField match { - case "_id" if aggType.sql == "count" => - countAsync( - JSONQuery( - query, - collection.immutable.Seq(sources: _*), - collection.immutable.Seq.empty[String] - ) - ).onComplete { - case Success(result) => - promise.success( - SingleValueAggregateResult( - field, - aggType, - NumericValue(result.getOrElse(0d)), - None - ) - ) - case Failure(f) => - logger.error(f.getMessage, f.fillInStackTrace()) - promise.success( - SingleValueAggregateResult(field, aggType, EmptyValue, Some(f.getMessage)) - ) - } - promise.future - case _ => - val jsonQuery = JSONQuery( - query, - collection.immutable.Seq(sources: _*), - collection.immutable.Seq.empty[String] - ) - import jsonQuery._ - logger.info( - s"Aggregating with query: ${jsonQuery.query} on indices: ${indices.mkString(", ")}" - ) - // Create a parser for the query - Try( - apply().search( - new SearchRequest.Builder() - .index(indices.asJava) - .withJson( - new StringReader(jsonQuery.query) - ) - .build() - ) - ) match { - case Success(response) => - logger.debug( - s"Aggregation response: ${response.toString}" - ) - val agg = aggName.split("\\.").last - - val itAgg = aggName.split("\\.").iterator - - var root = - if (aggregation.nested) { - response.aggregations().get(itAgg.next()).nested().aggregations() - } else { - response.aggregations() - } - - if (aggregation.filtered) { - root = root.get(itAgg.next()).filter().aggregations() - } - - promise.success( - SingleValueAggregateResult( - field, - aggType, - aggType match { - case sql.function.aggregate.COUNT => - NumericValue( - if (aggregation.distinct) { - root.get(agg).cardinality().value().toDouble - } else { - root.get(agg).valueCount().value() - } - ) - case sql.function.aggregate.SUM => - NumericValue(root.get(agg).sum().value()) - case sql.function.aggregate.AVG => - val avgAgg = root.get(agg).avg() - aggregateValue(avgAgg.value(), avgAgg.valueAsString()) - case sql.function.aggregate.MIN => - val minAgg = root.get(agg).min() - aggregateValue(minAgg.value(), minAgg.valueAsString()) - case sql.function.aggregate.MAX => - val maxAgg = root.get(agg).max() - aggregateValue(maxAgg.value(), maxAgg.valueAsString()) - case _ => EmptyValue - }, - None - ) - ) - case Failure(exception) => - logger.error(s"Failed to execute search for aggregation: $aggName", exception) - promise.success( - SingleValueAggregateResult( - field, - aggType, - EmptyValue, - Some(exception.getMessage) - ) - ) - } - promise.future - } - } - Future.sequence(futures) - } -} - -trait ElasticsearchClientIndexApi extends IndexApi with ElasticsearchClientCompanion { - _: ElasticsearchClientRefreshApi => - override def index(index: String, id: String, source: String): Boolean = { - tryOrElse( - apply() - .index( - new IndexRequest.Builder() - .index(index) - .id(id) - .withJson(new StringReader(source)) - .build() - ) - .shards() - .failed() - .intValue() == 0, - false - )(logger) - } - - override def indexAsync(index: String, id: String, source: String)(implicit - ec: ExecutionContext - ): Future[Boolean] = { - fromCompletableFuture( - async() - .index( - new IndexRequest.Builder() - .index(index) - .id(id) - .withJson(new StringReader(source)) - .build() - ) - ).flatMap { response => - if (response.shards().failed().intValue() == 0) { - Future.successful(true) - } else { - Future.failed(new Exception(s"Failed to index document with id: $id in index: $index")) - } - } - } -} - -trait ElasticsearchClientUpdateApi extends UpdateApi with ElasticsearchClientCompanion { - _: ElasticsearchClientRefreshApi => - override def update( - index: String, - id: String, - source: String, - upsert: Boolean - ): Boolean = { - tryOrElse( - apply() - .update( - new UpdateRequest.Builder[JMap[String, Object], JMap[String, Object]]() - .index(index) - .id(id) - .doc(mapper.readValue(source, classOf[JMap[String, Object]])) - .docAsUpsert(upsert) - .build(), - classOf[JMap[String, Object]] - ) - .shards() - .failed() - .intValue() == 0, - false - )(logger) - } - - override def updateAsync(index: String, id: String, source: String, upsert: Boolean)(implicit - ec: ExecutionContext - ): Future[Boolean] = { - fromCompletableFuture( - async() - .update( - new UpdateRequest.Builder[JMap[String, Object], JMap[String, Object]]() - .index(index) - .id(id) - .doc(mapper.readValue(source, classOf[JMap[String, Object]])) - .docAsUpsert(upsert) - .build(), - classOf[JMap[String, Object]] - ) - ).flatMap { response => - if (response.shards().failed().intValue() == 0) { - Future.successful(true) - } else { - Future.failed(new Exception(s"Failed to update document with id: $id in index: $index")) - } - } - } -} - -trait ElasticsearchClientDeleteApi extends DeleteApi with ElasticsearchClientCompanion { - _: ElasticsearchClientRefreshApi => - - override def delete(uuid: String, index: String): Boolean = { - tryOrElse( - apply() - .delete( - new DeleteRequest.Builder().index(index).id(uuid).build() - ) - .shards() - .failed() - .intValue() == 0, - false - )(logger) - } - - override def deleteAsync(uuid: String, index: String)(implicit - ec: ExecutionContext - ): Future[Boolean] = { - fromCompletableFuture( - async() - .delete( - new DeleteRequest.Builder().index(index).id(uuid).build() - ) - ).flatMap { response => - if (response.shards().failed().intValue() == 0) { - Future.successful(true) - } else { - Future.failed(new Exception(s"Failed to delete document with id: $uuid in index: $index")) - } - } - } - -} - -trait ElasticsearchClientGetApi extends GetApi with ElasticsearchClientCompanion { - - def get[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], formats: Formats): Option[U] = { - Try( - apply().get( - new GetRequest.Builder() - .index( - index.getOrElse( - maybeType.getOrElse( - m.runtimeClass.getSimpleName.toLowerCase - ) - ) - ) - .id(id) - .build(), - classOf[JMap[String, Object]] - ) - ) match { - case Success(response) => - if (response.found()) { - val source = mapper.writeValueAsString(response.source()) - logger.debug(s"Deserializing response $source for id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}") - // Deserialize the source string to the expected type - // Note: This assumes that the source is a valid JSON representation of U - // and that the serialization library is capable of handling it. - Try(serialization.read[U](source)) match { - case Success(value) => Some(value) - case Failure(f) => - logger.error( - s"Failed to deserialize response $source for id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}", - f - ) - None - } - } else { - None - } - case Failure(f) => - logger.error( - s"Failed to get document with id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}", - f - ) - None - } - } - - override def getAsync[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], ec: ExecutionContext, formats: Formats): Future[Option[U]] = { - fromCompletableFuture( - async() - .get( - new GetRequest.Builder() - .index( - index.getOrElse( - maybeType.getOrElse( - m.runtimeClass.getSimpleName.toLowerCase - ) - ) - ) - .id(id) - .build(), - classOf[JMap[String, Object]] - ) - ).flatMap { - case response if response.found() => - val source = mapper.writeValueAsString(response.source()) - logger.debug(s"Deserializing response $source for id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}") - // Deserialize the source string to the expected type - // Note: This assumes that the source is a valid JSON representation of U - // and that the serialization library is capable of handling it. - Try(serialization.read[U](source)) match { - case Success(value) => Future.successful(Some(value)) - case Failure(f) => - logger.error( - s"Failed to deserialize response $source for id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}", - f - ) - Future.successful(None) - } - case _ => Future.successful(None) - } - Future { - this.get[U](id, index, maybeType) - } - } -} - -trait ElasticsearchClientSearchApi extends SearchApi with ElasticsearchClientCompanion { - override implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String = - implicitly[ElasticSearchRequest](sqlSearch).query - - override def search[U]( - jsonQuery: JSONQuery - )(implicit m: Manifest[U], formats: Formats): List[U] = { - import jsonQuery._ - logger.info(s"Searching with query: $query on indices: ${indices.mkString(", ")}") - val response = apply().search( - new SearchRequest.Builder() - .index(indices.asJava) - .withJson( - new StringReader(query) - ) - .build(), - classOf[JMap[String, Object]] - ) - if (response.hits().total().value() > 0) { - response - .hits() - .hits() - .asScala - .flatMap { hit => - val source = mapper.writeValueAsString(hit.source()) - logger.debug(s"Deserializing hit: $source") - Try(serialization.read[U](source)).toOption.orElse { - logger.error( - s"Failed to deserialize hit: $source" - ) - None - } - } - .toList - } else { - List.empty[U] - } - } - - override def searchAsync[U]( - sqlQuery: SQLQuery - )(implicit m: Manifest[U], ec: ExecutionContext, formats: Formats): Future[List[U]] = { - val jsonQuery: JSONQuery = sqlQuery - import jsonQuery._ - fromCompletableFuture( - async() - .search( - new SearchRequest.Builder() - .index(indices.asJava) - .withJson(new StringReader(query)) - .build(), - classOf[JMap[String, Object]] - ) - ).flatMap { - case response if response.hits().total().value() > 0 => - Future.successful( - response - .hits() - .hits() - .asScala - .map { hit => - val source = mapper.writeValueAsString(hit.source()) - logger.debug(s"Deserializing hit: $source") - serialization.read[U](source) - } - .toList - ) - case _ => - logger.warn( - s"No hits found for query: ${sqlQuery.query} on indices: ${indices.mkString(", ")}" - ) - Future.successful(List.empty[U]) - } - } - - override def searchWithInnerHits[U, I](jsonQuery: JSONQuery, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[(U, List[I])] = { - import jsonQuery._ - logger.info(s"Searching with query: $query on indices: ${indices.mkString(", ")}") - val response = apply() - .search( - new SearchRequest.Builder() - .index(indices.asJava) - .withJson( - new StringReader(query) - ) - .build(), - classOf[JMap[String, Object]] - ) - val results = response - .hits() - .hits() - .asScala - .toList - if (results.nonEmpty) { - results.flatMap { hit => - val hitSource = hit.source() - Option(hitSource) - .map(mapper.writeValueAsString) - .flatMap { source => - logger.debug(s"Deserializing hit: $source") - Try(serialization.read[U](source)) match { - case Success(mainObject) => - Some(mainObject) - case Failure(f) => - logger.error( - s"Failed to deserialize hit: $source for query: $query on indices: ${indices.mkString(", ")}", - f - ) - None - } - } - .map { mainObject => - val innerHits = hit - .innerHits() - .asScala - .get(innerField) - .map(_.hits().hits().asScala.toList) - .getOrElse(Nil) - val innerObjects = innerHits.flatMap { innerHit => - val mapper = new JacksonJsonpMapper() - val writer = new StringWriter() - val generator = mapper.jsonProvider().createGenerator(writer) - mapper.serialize(innerHit, generator) - generator.close() - val innerSource = writer.toString - logger.debug(s"Processing inner hit: $innerSource") - val json = new JsonParser().parse(innerSource).getAsJsonObject - val gson = new Gson() - Try(serialization.read[I](gson.toJson(json.get("_source")))) match { - case Success(innerObject) => Some(innerObject) - case Failure(f) => - logger.error(s"Failed to deserialize inner hit: $innerSource", f) - None - } - } - (mainObject, innerObjects) - } - } - } else { - logger.warn(s"No hits found for query: $query on indices: ${indices.mkString(", ")}") - List.empty[(U, List[I])] - } - } - - override def multiSearch[U]( - jsonQueries: JSONQueries - )(implicit m: Manifest[U], formats: Formats): List[List[U]] = { - import jsonQueries._ - - val items = queries.map { query => - new RequestItem.Builder() - .header(new MultisearchHeader.Builder().index(query.indices.asJava).build()) - .body(new MultisearchBody.Builder().withJson(new StringReader(query.query)).build()) - .build() - } - - val request = new MsearchRequest.Builder().searches(items.asJava).build() - val responses = apply().msearch(request, classOf[JMap[String, Object]]) - - responses.responses().asScala.toList.map { - case response if response.isFailure => - logger.error(s"Error in multi search: ${response.failure().error().reason()}") - List.empty[U] - - case response => - response - .result() - .hits() - .hits() - .asScala - .toList - .map(hit => serialization.read[U](mapper.writeValueAsString(hit.source()))) - } - } - - override def multiSearchWithInnerHits[U, I](jsonQueries: JSONQueries, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[List[(U, List[I])]] = { - import jsonQueries._ - val items = queries.map { query => - new RequestItem.Builder() - .header(new MultisearchHeader.Builder().index(query.indices.asJava).build()) - .body(new MultisearchBody.Builder().withJson(new StringReader(query.query)).build()) - .build() - } - - val request = new MsearchRequest.Builder().searches(items.asJava).build() - val responses = apply().msearch(request, classOf[JMap[String, Object]]) - - responses.responses().asScala.toList.map { - case response if response.isFailure => - logger.error(s"Error in multi search: ${response.failure().error().reason()}") - List.empty[(U, List[I])] - - case response => - Try( - new JsonParser().parse(response.result().toString).getAsJsonObject ~> [U, I] innerField - ) match { - case Success(s) => s - case Failure(f) => - logger.error(f.getMessage, f) - List.empty - } - } - } - -} - -trait ElasticsearchClientBulkApi - extends ElasticsearchClientRefreshApi - with ElasticsearchClientSettingsApi - with ElasticsearchClientIndicesApi - with BulkApi { - override type A = BulkOperation - override type R = BulkResponse - - override def toBulkAction(bulkItem: BulkItem): A = { - import bulkItem._ - - action match { - case BulkAction.UPDATE => - new BulkOperation.Builder() - .update( - new UpdateOperation.Builder() - .index(index) - .id(id.orNull) - .action( - new UpdateAction.Builder[JMap[String, Object], JMap[String, Object]]() - .doc(mapper.readValue(body, classOf[JMap[String, Object]])) - .docAsUpsert(true) - .build() - ) - .build() - ) - .build() - - case BulkAction.DELETE => - val deleteId = id.getOrElse { - throw new IllegalArgumentException(s"Missing id for delete on index $index") - } - new BulkOperation.Builder() - .delete(new DeleteOperation.Builder().index(index).id(deleteId).build()) - .build() - - case _ => - new BulkOperation.Builder() - .index( - new IndexOperation.Builder[JMap[String, Object]]() - .index(index) - .id(id.orNull) - .document(mapper.readValue(body, classOf[JMap[String, Object]])) - .build() - ) - .build() - } - } - override def bulkResult: Flow[R, Set[String], NotUsed] = - Flow[BulkResponse] - .named("result") - .map(result => { - val items = result.items().asScala.toList - val grouped = items.groupBy(_.index()) - val indices = grouped.keys.toSet - for (index <- indices) { - logger - .info(s"Bulk operation succeeded for index $index with ${grouped(index).length} items.") - } - indices - }) - - override def bulk(implicit - bulkOptions: BulkOptions, - system: ActorSystem - ): Flow[Seq[A], R, NotUsed] = { - val parallelism = Math.max(1, bulkOptions.balance) - Flow[Seq[A]] - .named("bulk") - .mapAsyncUnordered[R](parallelism) { items => - val request = - new BulkRequest.Builder().index(bulkOptions.index).operations(items.asJava).build() - Try(apply().bulk(request)) match { - case Success(response) if response.errors() => - val failedItems = response.items().asScala.filter(_.status() >= 400) - if (failedItems.nonEmpty) { - val errorMessages = - failedItems.map(i => s"${i.id()} - ${i.error().reason()}").mkString(", ") - Future.failed(new Exception(s"Bulk operation failed for items: $errorMessages")) - } else { - Future.successful(response) - } - case Success(response) => - Future.successful(response) - case Failure(exception) => - logger.error("Bulk operation failed", exception) - Future.failed(exception) - } - } - } - - private[this] def toBulkElasticResultItem(i: BulkResponseItem): BulkElasticResultItem = - new BulkElasticResultItem { - override def index: String = i.index() - } - - override implicit def toBulkElasticAction(a: BulkOperation): BulkElasticAction = - new BulkElasticAction { - override def index: String = { - a match { - case op if op.isIndex => op.index().index() - case op if op.isDelete => op.delete().index() - case op if op.isUpdate => op.update().index() - case _ => - throw new IllegalArgumentException(s"Unsupported bulk operation type: ${a.getClass}") - } - } - } - - override implicit def toBulkElasticResult(r: BulkResponse): BulkElasticResult = { - new BulkElasticResult { - override def items: List[BulkElasticResultItem] = - r.items().asScala.toList.map(toBulkElasticResultItem) - } - } -} diff --git a/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala b/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala new file mode 100644 index 00000000..f61a430f --- /dev/null +++ b/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala @@ -0,0 +1,1469 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.java + +import akka.NotUsed +import akka.actor.ActorSystem +import akka.stream.scaladsl +import akka.stream.scaladsl.{Flow, Source} +import app.softnetwork.elastic.client._ +import app.softnetwork.elastic.client.bulk._ +import app.softnetwork.elastic.client.scroll._ +import app.softnetwork.elastic.sql.bridge._ +import app.softnetwork.elastic.sql.query.{SQLAggregation, SQLSearchRequest} +import app.softnetwork.elastic.client +import app.softnetwork.elastic.client.result.{ElasticFailure, ElasticResult, ElasticSuccess} +import co.elastic.clients.elasticsearch._types.{FieldSort, FieldValue, SortOptions, SortOrder, Time} +import co.elastic.clients.elasticsearch.core.bulk.{ + BulkOperation, + DeleteOperation, + IndexOperation, + UpdateAction, + UpdateOperation +} +import co.elastic.clients.elasticsearch.core.msearch.{ + MultisearchBody, + MultisearchHeader, + RequestItem +} +import co.elastic.clients.elasticsearch.core._ +import co.elastic.clients.elasticsearch.core.reindex.{Destination, Source => ESSource} +import co.elastic.clients.elasticsearch.core.search.PointInTimeReference +import co.elastic.clients.elasticsearch.indices.update_aliases.{Action, AddAction, RemoveAction} +import co.elastic.clients.elasticsearch.indices.{ExistsRequest => IndexExistsRequest, _} +import com.google.gson.JsonParser + +import _root_.java.io.{IOException, StringReader} +import _root_.java.util.{Map => JMap} +import scala.jdk.CollectionConverters._ +import scala.concurrent.{ExecutionContext, Future} +import scala.language.implicitConversions +import scala.util.{Failure, Success, Try} + +trait JavaClientApi + extends ElasticClientApi + with JavaClientIndicesApi + with JavaClientAliasApi + with JavaClientSettingsApi + with JavaClientMappingApi + with JavaClientRefreshApi + with JavaClientFlushApi + with JavaClientCountApi + with JavaClientIndexApi + with JavaClientUpdateApi + with JavaClientDeleteApi + with JavaClientGetApi + with JavaClientSearchApi + with JavaClientBulkApi + with JavaClientScrollApi + with JavaClientCompanion + with JavaClientVersionApi + +/** Elasticsearch client implementation using the Java Client + * @see + * [[VersionApi]] for version information + */ +trait JavaClientVersionApi extends VersionApi with JavaClientHelpers { + _: SerializationApi with JavaClientCompanion => + override private[client] def executeVersion(): result.ElasticResult[String] = + executeJavaAction( + operation = "version", + index = None, + retryable = true + )( + apply().info() + ) { response => + response.version().number() + } +} + +/** Elasticsearch client implementation of Indices API using the Java Client + * @see + * [[IndicesApi]] for index management operations + */ +trait JavaClientIndicesApi extends IndicesApi with RefreshApi with JavaClientHelpers { + _: JavaClientCompanion => + override private[client] def executeCreateIndex( + index: String, + settings: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "createIndex", + index = Some(index), + retryable = false + )( + apply() + .indices() + .create( + new CreateIndexRequest.Builder() + .index(index) + .settings(new IndexSettings.Builder().withJson(new StringReader(settings)).build()) + .build() + ) + )(_.acknowledged()) + + override private[client] def executeDeleteIndex(index: String): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "deleteIndex", + index = Some(index), + retryable = false + )( + apply() + .indices() + .delete(new DeleteIndexRequest.Builder().index(index).build()) + )(_.acknowledged()) + + override private[client] def executeCloseIndex(index: String): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "closeIndex", + index = Some(index), + retryable = false + )( + apply() + .indices() + .close(new CloseIndexRequest.Builder().index(index).build()) + )(_.acknowledged()) + + override private[client] def executeOpenIndex(index: String): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "openIndex", + index = Some(index), + retryable = false + )( + apply() + .indices() + .open(new OpenRequest.Builder().index(index).build()) + )(_.acknowledged()) + + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): result.ElasticResult[(Boolean, Option[Long])] = + executeJavaAction( + operation = "reindex", + index = Some(s"$sourceIndex -> $targetIndex"), + retryable = false + )( + apply() + .reindex( + new ReindexRequest.Builder() + .source(new ESSource.Builder().index(sourceIndex).build()) + .dest(new Destination.Builder().index(targetIndex).build()) + .refresh(refresh) + .build() + ) + ) { response => + val failures = response.failures().asScala.map(_.cause().reason()) + if (failures.nonEmpty) { + logger.error( + s"Reindexing from $sourceIndex to $targetIndex failed with errors: ${failures.take(10).mkString(", ")}" + ) + } + (failures.isEmpty, Option(response.total())) + } + + override private[client] def executeIndexExists(index: String): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "indexExists", + index = Some(index), + retryable = false + )( + apply() + .indices() + .exists( + new IndexExistsRequest.Builder().index(index).build() + ) + )(_.value()) + +} + +/** Elasticsearch client implementation of Alias API using the Java Client + * @see + * [[AliasApi]] for alias management operations + */ +trait JavaClientAliasApi extends AliasApi with JavaClientHelpers { + _: IndicesApi with JavaClientCompanion => + + override private[client] def executeAddAlias( + index: String, + alias: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "addAlias", + index = Some(index), + retryable = false + )( + apply() + .indices() + .updateAliases( + new UpdateAliasesRequest.Builder() + .actions( + new Action.Builder() + .add(new AddAction.Builder().index(index).alias(alias).build()) + .build() + ) + .build() + ) + )(_.acknowledged()) + + override private[client] def executeRemoveAlias( + index: String, + alias: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "removeAlias", + index = Some(index), + retryable = false + )( + apply() + .indices() + .updateAliases( + new UpdateAliasesRequest.Builder() + .actions( + new Action.Builder() + .remove(new RemoveAction.Builder().index(index).alias(alias).build()) + .build() + ) + .build() + ) + )(_.acknowledged()) + + override private[client] def executeAliasExists(alias: String): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "aliasExists", + index = None, + retryable = false + )( + apply() + .indices() + .existsAlias( + new ExistsAliasRequest.Builder().name(alias).build() + ) + )(_.value()) + + override private[client] def executeGetAliases(index: String): result.ElasticResult[String] = + executeJavaAction( + operation = "getAliases", + index = Some(index), + retryable = false + )( + apply() + .indices() + .getAlias( + new GetAliasRequest.Builder().index(index).build() + ) + )(response => convertToJson(response)) + + override private[client] def executeSwapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "swapAlias", + index = Some(s"$oldIndex <-> $newIndex"), + retryable = false + )( + apply() + .indices() + .updateAliases( + new UpdateAliasesRequest.Builder() + .actions( + List( + new Action.Builder() + .remove(new RemoveAction.Builder().index(oldIndex).alias(alias).build()) + .build(), + new Action.Builder() + .add(new AddAction.Builder().index(newIndex).alias(alias).build()) + .build() + ).asJava + ) + .build() + ) + )(_.acknowledged()) + +} + +/** Elasticsearch client implementation of Settings API using the Java Client + * @see + * [[SettingsApi]] for settings management operations + */ +trait JavaClientSettingsApi extends SettingsApi with JavaClientHelpers { + _: IndicesApi with JavaClientCompanion => + + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "updateSettings", + index = Some(index), + retryable = false + )( + apply() + .indices() + .putSettings( + new PutIndicesSettingsRequest.Builder() + .index(index) + .settings(new IndexSettings.Builder().withJson(new StringReader(settings)).build()) + .build() + ) + )(_.acknowledged()) + + override private[client] def executeLoadSettings(index: String): result.ElasticResult[String] = + executeJavaAction( + operation = "loadSettings", + index = Some(index), + retryable = true + )( + apply() + .indices() + .getSettings( + new GetIndicesSettingsRequest.Builder().index(index).build() + ) + )(response => convertToJson(response)) + +} + +/** Elasticsearch client implementation of Mapping API using the Java Client + * @see + * [[MappingApi]] for mapping management operations + */ +trait JavaClientMappingApi extends MappingApi with JavaClientHelpers { + _: SettingsApi with IndicesApi with RefreshApi with JavaClientCompanion => + + override private[client] def executeSetMapping( + index: String, + mapping: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "setMapping", + index = Some(index), + retryable = false + )( + apply() + .indices() + .putMapping( + new PutMappingRequest.Builder().index(index).withJson(new StringReader(mapping)).build() + ) + )(_.acknowledged()) + + override private[client] def executeGetMapping(index: String): result.ElasticResult[String] = + executeJavaAction( + operation = "getMapping", + index = Some(index), + retryable = true + )( + apply() + .indices() + .getMapping( + new GetMappingRequest.Builder().index(index).build() + ) + ) { response => + val valueOpt = response.result().asScala.get(index) + valueOpt match { + case Some(value) => convertToJson(value) + case None => """"{"properties": {}}""" + } + } + + /** Get the mapping properties of an index. + * + * @param index + * - the name of the index to get the mapping properties for + * @return + * the mapping properties of the index as a JSON string + */ + override def getMappingProperties(index: String): ElasticResult[String] = { + getMapping(index).flatMap { jsonString => + // ✅ Extracting mapping from JSON + ElasticResult.attempt( + new JsonParser().parse(jsonString).getAsJsonObject + ) match { + case ElasticFailure(error) => + logger.error(s"❌ Failed to parse JSON mapping for index '$index': ${error.message}") + return ElasticFailure(error.copy(operation = Some("getMapping"), index = Some(index))) + case ElasticSuccess(indexObj) => + val settingsObj = indexObj + .getAsJsonObject("mappings") + ElasticSuccess(settingsObj.toString) + } + } + } +} + +/** Elasticsearch client implementation of Refresh API using the Java Client + * @see + * [[RefreshApi]] for index refresh operations + */ +trait JavaClientRefreshApi extends RefreshApi with JavaClientHelpers { + _: JavaClientCompanion => + + override private[client] def executeRefresh(index: String): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "refresh", + index = Some(index), + retryable = false + )( + apply() + .indices() + .refresh( + new RefreshRequest.Builder().index(index).build() + ) + )( + _.shards() + .failed() + .intValue() == 0 + ) + +} + +/** Elasticsearch client implementation of Flush API using the Java Client + * @see + * [[FlushApi]] for index flush operations + */ +trait JavaClientFlushApi extends FlushApi with JavaClientHelpers { + _: JavaClientCompanion => + + override private[client] def executeFlush( + index: String, + force: Boolean, + wait: Boolean + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "flush", + index = Some(index), + retryable = false + )( + apply() + .indices() + .flush( + new FlushRequest.Builder().index(index).force(force).waitIfOngoing(wait).build() + ) + )( + _.shards() + .failed() + .intValue() == 0 + ) + +} + +/** Elasticsearch client implementation of Count API using the Java Client + * @see + * [[CountApi]] for count operations + */ +trait JavaClientCountApi extends CountApi with JavaClientHelpers { + _: JavaClientCompanion => + + override private[client] def executeCount( + query: ElasticQuery + ): result.ElasticResult[Option[Double]] = + executeJavaAction( + operation = "count", + index = Some(query.indices.mkString(",")), + retryable = true + )( + apply() + .count( + new CountRequest.Builder().index(query.indices.asJava).build() + ) + ) { response => + Option(response.count().toDouble) + } + + override private[client] def executeCountAsync( + query: ElasticQuery + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Option[Double]]] = + fromCompletableFuture( + async() + .count( + new CountRequest.Builder().index(query.indices.asJava).build() + ) + ).map { response => + result.ElasticSuccess(Option(response.count().toDouble)) + } + +} + +/** Elasticsearch client implementation of Index API using the Java Client + * @see + * [[IndexApi]] for index operations + */ +trait JavaClientIndexApi extends IndexApi with JavaClientHelpers { + _: RefreshApi with JavaClientCompanion with SerializationApi => + + override private[client] def executeIndex( + index: String, + id: String, + source: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "index", + index = Some(index), + retryable = false + )( + apply() + .index( + new IndexRequest.Builder() + .index(index) + .id(id) + .withJson(new StringReader(source)) + .build() + ) + )( + _.shards() + .failed() + .intValue() == 0 + ) + + override private[client] def executeIndexAsync(index: String, id: String, source: String)(implicit + ec: ExecutionContext + ): Future[result.ElasticResult[Boolean]] = + fromCompletableFuture( + async() + .index( + new IndexRequest.Builder() + .index(index) + .id(id) + .withJson(new StringReader(source)) + .build() + ) + ).map { response => + if (response.shards().failed().intValue() == 0) { + result.ElasticSuccess(true) + } else { + result.ElasticFailure( + client.result.ElasticError(s"Failed to index document with id: $id in index: $index") + ) + } + } + +} + +/** Elasticsearch client implementation of Update API using the Java Client + * @see + * [[UpdateApi]] for update operations + */ +trait JavaClientUpdateApi extends UpdateApi with JavaClientHelpers { + _: RefreshApi with JavaClientCompanion with SerializationApi => + + override private[client] def executeUpdate( + index: String, + id: String, + source: String, + upsert: Boolean + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "update", + index = Some(index), + retryable = false + )( + apply() + .update( + new UpdateRequest.Builder[JMap[String, Object], JMap[String, Object]]() + .index(index) + .id(id) + .doc(mapper.readValue(source, classOf[JMap[String, Object]])) + .docAsUpsert(upsert) + .build(), + classOf[JMap[String, Object]] + ) + )( + _.shards() + .failed() + .intValue() == 0 + ) + + override private[client] def executeUpdateAsync( + index: String, + id: String, + source: String, + upsert: Boolean + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Boolean]] = + fromCompletableFuture( + async() + .update( + new UpdateRequest.Builder[JMap[String, Object], JMap[String, Object]]() + .index(index) + .id(id) + .doc(mapper.readValue(source, classOf[JMap[String, Object]])) + .docAsUpsert(upsert) + .build(), + classOf[JMap[String, Object]] + ) + ).map { response => + if (response.shards().failed().intValue() == 0) { + result.ElasticSuccess(true) + } else { + result.ElasticFailure( + client.result.ElasticError(s"Failed to update document with id: $id in index: $index") + ) + } + } + +} + +/** Elasticsearch client implementation of Delete API using the Java Client + * @see + * [[DeleteApi]] for delete operations + */ +trait JavaClientDeleteApi extends DeleteApi with JavaClientHelpers { + _: RefreshApi with JavaClientCompanion => + + override private[client] def executeDelete( + index: String, + id: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "delete", + index = Some(index), + retryable = false + )( + apply() + .delete( + new DeleteRequest.Builder().index(index).id(id).build() + ) + )( + _.shards() + .failed() + .intValue() == 0 + ) + + override private[client] def executeDeleteAsync(index: String, id: String)(implicit + ec: ExecutionContext + ): Future[result.ElasticResult[Boolean]] = + fromCompletableFuture( + async() + .delete( + new DeleteRequest.Builder().index(index).id(id).build() + ) + ).map { response => + if (response.shards().failed().intValue() == 0) { + result.ElasticSuccess(true) + } else { + result.ElasticFailure( + client.result.ElasticError(s"Failed to delete document with id: $id in index: $index") + ) + } + } + +} + +/** Elasticsearch client implementation of Get API using the Java Client + * @see + * [[GetApi]] for get operations + */ +trait JavaClientGetApi extends GetApi with JavaClientHelpers { + _: JavaClientCompanion with SerializationApi => + + override private[client] def executeGet( + index: String, + id: String + ): result.ElasticResult[Option[String]] = + executeJavaAction( + operation = "get", + index = Some(index), + retryable = true + )( + apply() + .get( + new GetRequest.Builder() + .index(index) + .id(id) + .build(), + classOf[JMap[String, Object]] + ) + ) { response => + if (response.found()) { + Some(mapper.writeValueAsString(response.source())) + } else { + None + } + } + + override private[client] def executeGetAsync(index: String, id: String)(implicit + ec: ExecutionContext + ): Future[result.ElasticResult[Option[String]]] = + fromCompletableFuture( + async() + .get( + new GetRequest.Builder() + .index(index) + .id(id) + .build(), + classOf[JMap[String, Object]] + ) + ).map { response => + if (response.found()) { + result.ElasticSuccess(Some(mapper.writeValueAsString(response.source()))) + } else { + result.ElasticSuccess(None) + } + } + +} + +/** Elasticsearch client implementation of Search API using the Java Client + * @see + * [[SearchApi]] for search operations + */ +trait JavaClientSearchApi extends SearchApi with JavaClientHelpers { + _: JavaClientCompanion with SerializationApi => + + override implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String = + implicitly[ElasticSearchRequest](sqlSearch).query + + override private[client] def executeSingleSearch( + elasticQuery: ElasticQuery + ): result.ElasticResult[Option[String]] = + executeJavaAction( + operation = "singleSearch", + index = Some(elasticQuery.indices.mkString(",")), + retryable = true + )( + apply() + .search( + new SearchRequest.Builder() + .index(elasticQuery.indices.asJava) + .withJson( + new StringReader(elasticQuery.query) + ) + .build(), + classOf[JMap[String, Object]] + ) + )(resp => Some(convertToJson(resp))) + + override private[client] def executeMultiSearch( + elasticQueries: ElasticQueries + ): result.ElasticResult[Option[String]] = + executeJavaAction( + operation = "multiSearch", + index = Some(elasticQueries.queries.flatMap(_.indices).distinct.mkString(",")), + retryable = true + ) { + val items = elasticQueries.queries.map { q => + new RequestItem.Builder() + .header(new MultisearchHeader.Builder().index(q.indices.asJava).build()) + .body(new MultisearchBody.Builder().withJson(new StringReader(q.query)).build()) + .build() + } + + val request = new MsearchRequest.Builder().searches(items.asJava).build() + apply().msearch(request, classOf[JMap[String, Object]]) + }(resp => Some(convertToJson(resp))) + + override private[client] def executeSingleSearchAsync( + elasticQuery: ElasticQuery + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Option[String]]] = + fromCompletableFuture( + async() + .search( + new SearchRequest.Builder() + .index(elasticQuery.indices.asJava) + .withJson(new StringReader(elasticQuery.query)) + .build(), + classOf[JMap[String, Object]] + ) + ).map { response => + result.ElasticSuccess(Some(convertToJson(response))) + } + + override private[client] def executeMultiSearchAsync( + elasticQueries: ElasticQueries + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Option[String]]] = + fromCompletableFuture { + val items = elasticQueries.queries.map { q => + new RequestItem.Builder() + .header(new MultisearchHeader.Builder().index(q.indices.asJava).build()) + .body(new MultisearchBody.Builder().withJson(new StringReader(q.query)).build()) + .build() + } + + val request = new MsearchRequest.Builder().searches(items.asJava).build() + async().msearch(request, classOf[JMap[String, Object]]) + } + .map { response => + result.ElasticSuccess(Some(convertToJson(response))) + } + +} + +/** Elasticsearch client implementation of Bulk API using the Java Client + * @see + * [[BulkApi]] for bulk operations + */ +trait JavaClientBulkApi extends BulkApi with JavaClientHelpers { + _: RefreshApi with SettingsApi with IndexApi with JavaClientCompanion => + override type BulkActionType = BulkOperation + override type BulkResultType = BulkResponse + + override implicit private[client] def toBulkElasticAction(a: BulkOperation): BulkElasticAction = + new BulkElasticAction { + override def index: String = { + a match { + case op if op.isIndex => op.index().index() + case op if op.isDelete => op.delete().index() + case op if op.isUpdate => op.update().index() + case _ => + throw new IllegalArgumentException(s"Unsupported bulk operation type: ${a.getClass}") + } + } + } + + /** Basic flow for executing a bulk action. This method must be implemented by concrete classes + * depending on the Elasticsearch version and client used. + * + * @param bulkOptions + * configuration options + * @return + * Flow transforming bulk actions into results + */ + override private[client] def bulkFlow(implicit + bulkOptions: BulkOptions, + system: ActorSystem + ): Flow[Seq[A], R, NotUsed] = { + val parallelism = Math.max(1, bulkOptions.balance) + Flow[Seq[A]] + .named("bulk") + .mapAsyncUnordered[R](parallelism) { items => + val request = + new BulkRequest.Builder().index(bulkOptions.defaultIndex).operations(items.asJava).build() + Try(apply().bulk(request)) match { + case Success(response) => + if (response.errors()) { + val failedItems = response.items().asScala.filter(_.status() >= 400) + if (failedItems.nonEmpty) { + val errorMessages = + failedItems + .take(10) + .map(i => s"(${i.index()}, ${i.id()}) -> ${i.error().reason()}") + .mkString(", ") + logger.error(s"Bulk operation failed for items: $errorMessages") + } else { + logger.warn("Bulk operation reported errors but no failed items found") + } + } + Future.successful(response) + case Failure(exception) => + logger.error(s"Bulk operation failed : ${exception.getMessage}") + Future.failed(exception) + } + } + } + + /** Convert a BulkResultType into individual results. This method must extract the successes and + * failures from the ES response. + * + * @param result + * raw result from the bulk + * @return + * sequence of Right(id) for success or Left(failed) for failure + */ + override private[client] def extractBulkResults( + result: BulkResponse, + originalBatch: Seq[BulkItem] + ): Seq[Either[FailedDocument, SuccessfulDocument]] = { + // no results at all + if ( + originalBatch.nonEmpty && + (result == null || (result.items() == null || result.items().isEmpty)) + ) { + logger.error("Bulk result is null or has no items") + return originalBatch.map { item => + Left( + FailedDocument( + id = item.id.getOrElse("unknown"), + index = item.index, + document = item.document, + error = BulkError( + message = "Null bulk result", + `type` = "internal_error", + status = 500 + ), + retryable = false + ) + ) + } + } + + // process failed items + val failedItems = + result + .items() + .asScala + .filter(item => Option(item.error()).isDefined) + .map { item => + val errorStatus = item.status() + val errorType = item.error().`type` + val errorReason = item.error().reason() + + val originalItemOpt = originalBatch.find { originalItem => + originalItem.index == item.index() && originalItem.id.contains(item.id()) + } + + // Determine if the error is retryable + val isRetryable = + originalItemOpt.isDefined && (BulkErrorAnalyzer.isRetryable(errorStatus) || + BulkErrorAnalyzer.isRetryableByType(errorType)) + + val document = originalItemOpt.map(_.document).getOrElse("") + Left( + FailedDocument( + id = item.id(), + index = item.index(), + document = document, + error = BulkError( + message = errorReason, + `type` = errorType, + status = errorStatus + ), + retryable = isRetryable + ) + ) + } + .toSeq + + // process successful items + val successfulItems = result + .items() + .asScala + .filter(item => Option(item.error()).isEmpty) + .map { item => + Right( + SuccessfulDocument( + id = item.id(), + index = item.index() + ) + ) + } + .toSeq + + val results = failedItems ++ successfulItems + + // if no individual results but overall failure, mark all as failed + if (results.isEmpty && originalBatch.nonEmpty) { + logger.error("Bulk operation failed with no individual item results") + return originalBatch.map { item => + Left( + FailedDocument( + id = item.id.getOrElse("unknown"), + index = item.index, + document = item.document, + error = BulkError( + message = "Bulk operation failed with no individual item results", + `type` = "internal_error", + status = 500 + ), + retryable = false + ) + ) + } + } + + results + } + + override private[client] def toBulkAction(bulkItem: BulkItem): A = { + import bulkItem._ + + action match { + case BulkAction.UPDATE => + new BulkOperation.Builder() + .update( + new UpdateOperation.Builder() + .index(bulkItem.index) + .id(id.orNull) + .action( + new UpdateAction.Builder[JMap[String, Object], JMap[String, Object]]() + .doc(mapper.readValue(document, classOf[JMap[String, Object]])) + .docAsUpsert(true) + .build() + ) + .build() + ) + .build() + + case BulkAction.DELETE => + val deleteId = id.getOrElse { + throw new IllegalArgumentException(s"Missing id for delete on index ${bulkItem.index}") + } + new BulkOperation.Builder() + .delete(new DeleteOperation.Builder().index(bulkItem.index).id(deleteId).build()) + .build() + + case _ => + new BulkOperation.Builder() + .index( + new IndexOperation.Builder[JMap[String, Object]]() + .index(bulkItem.index) + .id(id.orNull) + .document(mapper.readValue(document, classOf[JMap[String, Object]])) + .build() + ) + .build() + } + } + + /** Conversion BulkActionType -> BulkItem */ + override private[client] def actionToBulkItem(action: BulkActionType): BulkItem = + action match { + case op if op.isIndex => + BulkItem( + index = op.index().index(), + id = Option(op.index().id()), + document = mapper.writeValueAsString(op.index().document()), + action = BulkAction.INDEX, + parent = None + ) + case op if op.isDelete => + BulkItem( + index = op.delete().index(), + id = Some(op.delete().id()), + document = "", + action = BulkAction.DELETE, + parent = None + ) + case op if op.isUpdate => + BulkItem( + index = op.update().index(), + id = Some(op.update().id()), + document = mapper.writeValueAsString(op.update().action().doc()), + action = BulkAction.UPDATE, + parent = None + ) + case _ => + throw new IllegalArgumentException(s"Unsupported bulk operation type: ${action.getClass}") + } + +} + +/** Elasticsearch client implementation of Scroll API using the Java Client + * @see + * [[ScrollApi]] for scroll operations + */ +trait JavaClientScrollApi extends ScrollApi with JavaClientHelpers { + _: VersionApi with SearchApi with JavaClientCompanion => + + /** Classic scroll (works for both hits and aggregations) + */ + override private[client] def scrollClassic( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation], + config: ScrollConfig + )(implicit system: ActorSystem): scaladsl.Source[Map[String, Any], NotUsed] = { + implicit val ec: ExecutionContext = system.dispatcher + Source + .unfoldAsync[Option[String], Seq[Map[String, Any]]](None) { scrollIdOpt => + retryWithBackoff(config.retryConfig) { + Future { + scrollIdOpt match { + case None => + // Initial search with scroll + logger.info( + s"Starting classic scroll on indices: ${elasticQuery.indices.mkString(", ")}" + ) + + val searchRequest = new SearchRequest.Builder() + .index(elasticQuery.indices.asJava) + .withJson(new StringReader(elasticQuery.query)) + .scroll(Time.of(t => t.time(config.keepAlive))) + .size(config.scrollSize) + .build() + + val response = apply().search(searchRequest, classOf[JMap[String, Object]]) + + if ( + response.shards() != null && response + .shards() + .failed() != null && response.shards().failed().intValue() > 0 + ) { + val failures = response.shards().failures() + val errorMsg = if (failures != null && !failures.isEmpty) { + failures.asScala.map(_.reason()).mkString("; ") + } else { + "Unknown shard failure" + } + throw new IOException(s"Initial scroll failed: $errorMsg") + } + + val scrollId = response.scrollId() + + if (scrollId == null) { + throw new IllegalStateException("Scroll ID is null in response") + } + + val results = extractAllResults(Left(response), fieldAliases, aggregations) + + if (results.isEmpty || scrollId == null) None + else Some((Some(scrollId), results)) + + case Some(scrollId) => + // Subsequent scroll + logger.debug(s"Fetching next scroll batch (scrollId: $scrollId)") + + val scrollRequest = new ScrollRequest.Builder() + .scrollId(scrollId) + .scroll(Time.of(t => t.time(config.keepAlive))) + .build() + + val response = apply().scroll(scrollRequest, classOf[JMap[String, Object]]) + + if ( + response.shards() != null && response + .shards() + .failed() != null && response.shards().failed().intValue() > 0 + ) { + clearScroll(scrollId) + val failures = response.shards().failures() + val errorMsg = if (failures != null && !failures.isEmpty) { + failures.asScala.map(_.reason()).mkString("; ") + } else { + "Unknown shard failure" + } + throw new IOException(s"Scroll continuation failed: $errorMsg") + } + + val newScrollId = response.scrollId() + val results = extractAllResults(Right(response), fieldAliases, aggregations) + + if (results.isEmpty) { + clearScroll(scrollId) + None + } else { + Some((Some(newScrollId), results)) + } + } + } + }(system, logger).recover { case ex: Exception => + logger.error(s"Scroll failed after retries: ${ex.getMessage}", ex) + scrollIdOpt.foreach(clearScroll) + None + } + } + .mapConcat(identity) + } + + /** Search After (only for hits, more efficient) + */ + override private[client] def searchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean = false + )(implicit system: ActorSystem): scaladsl.Source[Map[String, Any], NotUsed] = { + pitSearchAfter(elasticQuery, fieldAliases, config, hasSorts) + } + + /** PIT + search_after (recommended for ES 7.10+, required for ES 8+) + * + * Advantages: + * - More efficient than classic scroll (stateless) + * - Better for deep pagination + * - Can be parallelized + * - Lower memory footprint on ES cluster + * + * @note + * Only works for hits, not for aggregations (use scrollSourceClassic for aggregations) + */ + private[client] def pitSearchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean = false + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = { + implicit val ec: ExecutionContext = system.dispatcher + + // Step 1: Open PIT + val pitIdFuture: Future[String] = openPit(elasticQuery.indices, config.keepAlive) + + Source + .futureSource { + pitIdFuture.map { pitId => + logger.info(s"Opened PIT: $pitId for indices: ${elasticQuery.indices.mkString(", ")}") + + Source + .unfoldAsync[Option[Seq[Any]], Seq[Map[String, Any]]](None) { searchAfterOpt => + retryWithBackoff(config.retryConfig) { + Future { + searchAfterOpt match { + case None => + logger.info(s"Starting PIT search_after (pitId: ${pitId.take(20)}...)") + case Some(values) => + logger.debug( + s"Fetching next PIT search_after batch (after: ${if (values.length > 3) + s"[${values.take(3).mkString(", ")}...]" + else values.mkString(", ")})" + ) + } + + // Build search request with PIT + val requestBuilder = new SearchRequest.Builder() + .size(config.scrollSize) + .pit( + PointInTimeReference + .of(p => p.id(pitId).keepAlive(Time.of(t => t.time(config.keepAlive)))) + ) + + // Parse query to add query clause (not indices, they're in PIT) + val queryJson = new JsonParser().parse(elasticQuery.query).getAsJsonObject + + // Extract query clause if present + if (queryJson.has("query")) { + requestBuilder.withJson(new StringReader(elasticQuery.query)) + } + + // Check if sorts already exist in the query + if (!hasSorts && !queryJson.has("sort")) { + logger.warn( + "No sort fields in query for PIT search_after, adding default _shard_doc sort. " + + "_shard_doc is more efficient than _id for PIT." + ) + requestBuilder.sort( + SortOptions.of { sortBuilder => + sortBuilder.field( + FieldSort.of(fieldSortBuilder => + fieldSortBuilder.field("_shard_doc").order(SortOrder.Asc) + ) + ) + } + ) + } else if (hasSorts && queryJson.has("sort")) { + // Sorts already present, check that a tie-breaker exists + val existingSorts = queryJson.getAsJsonArray("sort") + val hasShardDocSort = existingSorts.asScala.exists { sortElem => + sortElem.isJsonObject && ( + sortElem.getAsJsonObject.has("_shard_doc") || + sortElem.getAsJsonObject.has("_id") + ) + } + if (!hasShardDocSort) { + // Add _id as tie-breaker + logger.debug("Adding _shard_doc as tie-breaker to existing sorts") + requestBuilder.sort( + SortOptions.of { sortBuilder => + sortBuilder.field( + FieldSort.of(fieldSortBuilder => + fieldSortBuilder.field("_shard_doc").order(SortOrder.Asc) + ) + ) + } + ) + } + } + + // Add search_after if available + searchAfterOpt.foreach { searchAfter => + val fieldValues: Seq[FieldValue] = searchAfter.map { + case s: String => FieldValue.of(s) + case i: Int => FieldValue.of(i.toLong) + case l: Long => FieldValue.of(l) + case d: Double => FieldValue.of(d) + case b: Boolean => FieldValue.of(b) + case other => FieldValue.of(other.toString) + } + requestBuilder.searchAfter(fieldValues.asJava) + } + + val response = apply().search( + requestBuilder.build(), + classOf[JMap[String, Object]] + ) + + // Check errors + if ( + response.shards() != null && + response.shards().failed() != null && + response.shards().failed().intValue() > 0 + ) { + val failures = response.shards().failures() + val errorMsg = if (failures != null && !failures.isEmpty) { + failures.asScala.map(_.reason()).mkString("; ") + } else { + "Unknown shard failure" + } + throw new IOException(s"PIT search_after failed: $errorMsg") + } + + val hits = extractHitsOnly(response, fieldAliases) + + if (hits.isEmpty) { + // Close PIT when done + closePit(pitId) + None + } else { + val lastHit = response.hits().hits().asScala.lastOption + val nextSearchAfter = lastHit.flatMap { hit => + val sortValues = hit.sort().asScala + if (sortValues.nonEmpty) { + Some(sortValues.map { fieldValue => + if (fieldValue.isString) fieldValue.stringValue() + else if (fieldValue.isDouble) fieldValue.doubleValue() + else if (fieldValue.isLong) fieldValue.longValue() + else if (fieldValue.isBoolean) fieldValue.booleanValue() + else if (fieldValue.isNull) null + else fieldValue.toString + }.toSeq) + } else { + None + } + } + + logger.debug(s"Retrieved ${hits.size} documents, continuing with PIT") + Some((nextSearchAfter, hits)) + } + } + }(system, logger).recover { case ex: Exception => + logger.error(s"PIT search_after failed after retries: ${ex.getMessage}", ex) + closePit(pitId) + None + } + } + .watchTermination() { (_, done) => + // Cleanup PIT on stream completion/failure + done.onComplete { + case scala.util.Success(_) => + logger.info( + s"PIT search_after completed successfully, closing PIT: ${pitId.take(20)}..." + ) + closePit(pitId) + case scala.util.Failure(ex) => + logger.error( + s"PIT search_after failed: ${ex.getMessage}, closing PIT: ${pitId.take(20)}..." + ) + closePit(pitId) + } + NotUsed + } + .mapConcat(identity) + } + } + .mapMaterializedValue(_ => NotUsed) + } + + /** Open a Point In Time + */ + private def openPit(indices: Seq[String], keepAlive: String)(implicit + ec: ExecutionContext + ): Future[String] = { + Future { + logger.debug(s"Opening PIT for indices: ${indices.mkString(", ")} with keepAlive: $keepAlive") + + val openPitRequest = new OpenPointInTimeRequest.Builder() + .index(indices.asJava) + .keepAlive(Time.of(t => t.time(keepAlive))) + .build() + + val response = apply().openPointInTime(openPitRequest) + val pitId = response.id() + + if (pitId == null || pitId.isEmpty) { + throw new IllegalStateException("PIT ID is null or empty in response") + } + + logger.info(s"PIT opened successfully: ${pitId.take(20)}... (keepAlive: $keepAlive)") + pitId + }.recoverWith { case ex: Exception => + logger.error(s"Failed to open PIT: ${ex.getMessage}", ex) + Future.failed( + new IOException(s"Failed to open PIT for indices: ${indices.mkString(", ")}", ex) + ) + } + } + + /** Close a Point In Time + */ + private def closePit(pitId: String): Unit = { + Try { + logger.debug(s"Closing PIT: ${pitId.take(20)}...") + + val closePitRequest = new ClosePointInTimeRequest.Builder() + .id(pitId) + .build() + + val response = apply().closePointInTime(closePitRequest) + + if (response.succeeded()) { + logger.info(s"PIT closed successfully: ${pitId.take(20)}...") + } else { + logger.warn(s"PIT close reported failure: ${pitId.take(20)}...") + } + }.recover { case ex: Exception => + logger.warn(s"Failed to close PIT ${pitId.take(20)}...: ${ex.getMessage}") + } + } + + /** Extract ALL results: hits + aggregations This is crucial for queries with aggregations (GROUP + * BY, COUNT, AVG, etc.) + */ + private def extractAllResults( + response: Either[SearchResponse[JMap[String, Object]], ScrollResponse[JMap[String, Object]]], + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + ): Seq[Map[String, Any]] = { + val jsonString = + response match { + case Left(l) => convertToJson(l) + case Right(r) => convertToJson(r) + } + val sqlResponse = + ElasticResponse("", jsonString, fieldAliases, aggregations.map(kv => kv._1 -> kv._2)) + + parseResponse(sqlResponse) match { + case Success(rows) => + logger.debug(s"Parsed ${rows.size} rows from response (hits + aggregations)") + rows + case Failure(ex) => + logger.error(s"Failed to parse scroll response: ${ex.getMessage}", ex) + Seq.empty + } + } + + /** Extract ONLY hits (for search_after optimization) Ignores aggregations for better performance + */ + private def extractHitsOnly( + response: SearchResponse[JMap[String, Object]], + fieldAliases: Map[String, String] + ): Seq[Map[String, Any]] = { + val jsonString = convertToJson(response) + val sqlResponse = ElasticResponse("", jsonString, fieldAliases, Map.empty) + + parseResponse(sqlResponse) match { + case Success(rows) => + logger.debug(s"Parsed ${rows.size} hits from response") + rows + case Failure(ex) => + logger.error(s"Failed to parse search after response: ${ex.getMessage}", ex) + Seq.empty + } + } + + /** Clear scroll context to free resources + */ + private def clearScroll(scrollId: String): Unit = { + Try { + logger.debug(s"Clearing scroll: $scrollId") + val clearRequest = new ClearScrollRequest.Builder() + .scrollId(scrollId) + .build() + apply().clearScroll(clearRequest) + }.recover { case ex: Exception => + logger.warn(s"Failed to clear scroll $scrollId: ${ex.getMessage}") + } + } +} diff --git a/es9/java/src/main/scala/app/softnetwork/elastic/client/java/ElasticsearchClientCompanion.scala b/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientCompanion.scala similarity index 53% rename from es9/java/src/main/scala/app/softnetwork/elastic/client/java/ElasticsearchClientCompanion.scala rename to es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientCompanion.scala index 0fcd0123..0d733502 100644 --- a/es9/java/src/main/scala/app/softnetwork/elastic/client/java/ElasticsearchClientCompanion.scala +++ b/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientCompanion.scala @@ -16,13 +16,12 @@ package app.softnetwork.elastic.client.java -import app.softnetwork.elastic.client.ElasticConfig +import app.softnetwork.elastic.client.ElasticClientCompanion import co.elastic.clients.elasticsearch.{ElasticsearchAsyncClient, ElasticsearchClient} import co.elastic.clients.json.jackson.JacksonJsonpMapper import co.elastic.clients.transport.rest_client.RestClientTransport import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.ClassTagExtensions -import org.apache.http.HttpHost import org.apache.http.auth.{AuthScope, UsernamePasswordCredentials} import org.apache.http.impl.client.BasicCredentialsProvider import org.apache.http.impl.nio.client.HttpAsyncClientBuilder @@ -32,19 +31,51 @@ import org.slf4j.{Logger, LoggerFactory} import java.util.concurrent.CompletableFuture import scala.concurrent.{Future, Promise} -trait ElasticsearchClientCompanion { +trait JavaClientCompanion extends ElasticClientCompanion[ElasticsearchClient] { val logger: Logger = LoggerFactory getLogger getClass.getName - def elasticConfig: ElasticConfig + @volatile private var asyncClient: Option[ElasticsearchAsyncClient] = None - private var client: Option[ElasticsearchClient] = None - - private var asyncClient: Option[ElasticsearchAsyncClient] = None + /** Lock object for synchronized initialization + */ + private val lock = new Object() lazy val mapper: ObjectMapper with ClassTagExtensions = new ObjectMapper() with ClassTagExtensions - def transport: RestClientTransport = { + def async(): ElasticsearchAsyncClient = { + // First check (no locking) - fast path for already initialized client + asyncClient match { + case Some(c) => c + case None => + // Second check with lock - slow path for initialization + lock.synchronized { + asyncClient match { + case Some(c) => + c // Another thread initialized while we were waiting + case None => + val c = createAsyncClient() + asyncClient = Some(c) + logger.info( + s"Elasticsearch async Client initialized for ${elasticConfig.credentials.url}" + ) + c + } + } + } + } + + private def createAsyncClient(): ElasticsearchAsyncClient = { + try { + new ElasticsearchAsyncClient(buildTransport()) + } catch { + case ex: Exception => + logger.error(s"Failed to create ElasticsearchAsyncClient: ${ex.getMessage}", ex) + throw new IllegalStateException("Cannot create Elasticsearch async client", ex) + } + } + + private def buildTransport(): RestClientTransport = { val credentialsProvider = new BasicCredentialsProvider() if (elasticConfig.credentials.username.nonEmpty) { credentialsProvider.setCredentials( @@ -57,7 +88,7 @@ trait ElasticsearchClientCompanion { } val restClientBuilder: RestClientBuilder = RestClient .builder( - HttpHost.create(elasticConfig.credentials.url) + parseHttpHost(elasticConfig.credentials.url) ) .setHttpClientConfigCallback((httpAsyncClientBuilder: HttpAsyncClientBuilder) => httpAsyncClientBuilder.setDefaultCredentialsProvider(credentialsProvider) @@ -65,23 +96,34 @@ trait ElasticsearchClientCompanion { new RestClientTransport(restClientBuilder.build(), new JacksonJsonpMapper()) } - def apply(): ElasticsearchClient = { - client match { - case Some(c) => c - case _ => - val c = new ElasticsearchClient(transport) - client = Some(c) - c + /** Create and configure Elasticsearch Client + */ + override protected def createClient(): ElasticsearchClient = { + try { + new ElasticsearchClient(buildTransport()) + } catch { + case ex: Exception => + logger.error(s"Failed to create ElasticsearchClient: ${ex.getMessage}", ex) + throw new IllegalStateException("Cannot create Elasticsearch client", ex) } } - def async(): ElasticsearchAsyncClient = { - asyncClient match { - case Some(c) => c - case _ => - val c = new ElasticsearchAsyncClient(transport) - asyncClient = Some(c) - c + /** Test connection to Elasticsearch cluster + * + * @return + * true if connection is successful + */ + override def testConnection(): Boolean = { + try { + val c = apply() + val response = c.info() + logger.info(s"Connected to Elasticsearch ${response.version().number()}") + true + } catch { + case ex: Exception => + logger.error(s"Failed to connect to Elasticsearch: ${ex.getMessage}", ex) + incrementFailures() + false } } diff --git a/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientConversion.scala b/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientConversion.scala new file mode 100644 index 00000000..20729e6b --- /dev/null +++ b/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientConversion.scala @@ -0,0 +1,46 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.java + +import co.elastic.clients.json.JsonpSerializable +import co.elastic.clients.json.jackson.JacksonJsonpMapper + +import java.io.{IOException, StringWriter} +import scala.util.Try + +trait JavaClientConversion { _: JavaClientCompanion => + private[this] val jsonpMapper = new JacksonJsonpMapper(mapper) + + /** Convert any Elasticsearch response to JSON string */ + protected def convertToJson[T <: JsonpSerializable](response: T): String = { + val stringWriter = new StringWriter() + val generator = jsonpMapper.jsonProvider().createGenerator(stringWriter) + try { + response.serialize(generator, jsonpMapper) + generator.flush() + stringWriter.toString + } catch { + case ex: Exception => + logger.error(s"Failed to convert response to JSON: ${ex.getMessage}", ex) + throw new IOException("Failed to serialize Elasticsearch response", ex) + } finally { + Try(generator.close()).failed.foreach { ex => + logger.warn(s"Failed to close JSON generator: ${ex.getMessage}") + } + } + } +} diff --git a/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientHelpers.scala b/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientHelpers.scala new file mode 100644 index 00000000..940aa01e --- /dev/null +++ b/es8/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientHelpers.scala @@ -0,0 +1,301 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.java + +import app.softnetwork.elastic.client.ElasticClientHelpers +import app.softnetwork.elastic.client.result.{ElasticError, ElasticResult} + +import scala.util.{Failure, Success, Try} + +trait JavaClientHelpers extends ElasticClientHelpers with JavaClientConversion { + _: JavaClientCompanion => + + // ======================================================================== + // GENERIC METHODS FOR EXECUTING JAVA CLIENT ACTIONS + // ======================================================================== + + //format:off + /** Execute a Java Client action with a generic transformation of the result. + * + * @tparam Resp + * type of the response + * @tparam T + * type of the desired final result + * @param operation + * name of the operation (for logging and error context) + * @param index + * relevant index (optional, for logging) + * @param retryable + * true if the operation can be retried in case of a transient error + * @param action + * function executing the action and returning the response + * @param transformer + * function transforming the response into T + * @return + * ElasticResult[T] + * + * @example + * {{{ + * executeJavaAction[CreateIndexResponse, Boolean]( + * operation = "createIndex", + * index = Some("my-index"), + * retryable = false + * )( + * action = apply().indices().create(builder => builder.index("my-index")) + * )( + * transformer = resp => resp.acknowledged() + * ) + * }}} + */ + //format:on + private[client] def executeJavaAction[Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + transformer: Resp => T + ): ElasticResult[T] = { + val indexStr = index.map(i => s" on index '$i'").getOrElse("") + logger.debug(s"Executing operation '$operation'$indexStr") + + // ✅ Execution with exception handling + val tryResult: Try[Resp] = Try { + action + } + + // ✅ Conversion to ElasticResult[Resp] + val elasticResult: ElasticResult[Resp] = tryResult match { + case Success(result) => + ElasticResult.success(result) + case Failure(ex: co.elastic.clients.elasticsearch._types.ElasticsearchException) => + // Extract error details from Elasticsearch exception + val statusCode = Option(ex.status()).map(_.intValue()) + val errorType = Option(ex.error()).flatMap(e => Option(e.`type`())) + val reason = Option(ex.error()).flatMap(e => Option(e.reason())) + + val message = + s"Elasticsearch error during $operation: ${errorType.getOrElse("unknown")} - ${reason + .getOrElse(ex.getMessage)}" + logger.error(s"$message$indexStr", ex) + + ElasticResult.failure( + ElasticError( + message = message, + cause = Some(ex), + statusCode = statusCode, + operation = Some(operation) + ) + ) + case Failure(ex: java.io.IOException) => + logger.error(s"IO exception during operation '$operation'$indexStr: ${ex.getMessage}", ex) + ElasticResult.failure( + ElasticError( + message = s"IO error during $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + case Failure(ex) => + logger.error(s"Exception during operation '$operation'$indexStr: ${ex.getMessage}", ex) + ElasticResult.failure( + ElasticError( + message = s"Exception during $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + } + + // ✅ Apply transformation + elasticResult.flatMap { result => + Try(transformer(result)) match { + case Success(transformed) => + logger.debug(s"Operation '$operation'$indexStr succeeded") + ElasticResult.success(transformed) + case Failure(ex) => + logger.error(s"Transformation failed for operation '$operation'$indexStr", ex) + ElasticResult.failure( + ElasticError( + message = s"Failed to transform result: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + } + } + } + + /** Simplified variant for operations returning Boolean values (acknowledged). + * + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param action + * function executing the action + * @param acknowledgedExtractor + * function to extract the acknowledged status + * @return + * ElasticResult[Boolean] + */ + private[client] def executeJavaBooleanAction[Resp]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + acknowledgedExtractor: Resp => Boolean + ): ElasticResult[Boolean] = { + executeJavaAction[Resp, Boolean](operation, index, retryable)(action)(acknowledgedExtractor) + } + + //format:off + /** Variant to execute an action and extract a specific field from the response. + * + * @tparam Resp + * type of the response + * @tparam T + * type of the final result + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param action + * function executing the action + * @param extractor + * function extracting T from the response + * @return + * ElasticResult[T] + * + * @example + * {{{ + * executeJavaWithExtractor[CountResponse, Long]( + * operation = "countDocuments", + * index = Some("my-index") + * )( + * action = apply().count(builder => builder.index("my-index")) + * )( + * extractor = resp => resp.count() + * ) + * }}} + */ + //format:on + private[client] def executeJavaWithExtractor[Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + extractor: Resp => T + ): ElasticResult[T] = { + executeJavaAction[Resp, T](operation, index, retryable)(action)(extractor) + } + + //format:off + /** Asynchronous variant to execute a Java Client action. Note: The Java Client doesn't have + * native async support, so we wrap in Future. + * + * @tparam Resp + * type of the response + * @tparam T + * type of the desired final result + * @param operation + * name of the operation + * @param index + * relevant index (optional) + * @param retryable + * true if retryable + * @param action + * function executing the action + * @param transformer + * function transforming the response into T + * @return + * Future of [ElasticResult[T] + * + * @example + * {{{ + * executeAsyncJavaAction[IndexResponse, String]( + * operation = "indexDocument", + * index = Some("my-index") + * )( + * action = apply().index(builder => builder.index("my-index").document(doc)) + * )( + * transformer = resp => resp.id() + * ) + * }}} + */ + //format:on + private[client] def executeAsyncJavaAction[Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + transformer: Resp => T + )(implicit ec: scala.concurrent.ExecutionContext): scala.concurrent.Future[ElasticResult[T]] = { + val indexStr = index.map(i => s" on index '$i'").getOrElse("") + logger.debug(s"Executing operation '$operation'$indexStr asynchronously") + + scala.concurrent.Future { + executeJavaAction[Resp, T](operation, index, retryable)(action)(transformer) + } + } + + /** Simplified asynchronous variant for operations returning Boolean values. + * + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param action + * function executing the action + * @param acknowledgedExtractor + * function to extract the acknowledged status + * @return + * Future of ElasticResult[Boolean] + */ + private[client] def executeAsyncJavaBooleanAction[Resp]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + acknowledgedExtractor: Resp => Boolean + )(implicit + ec: scala.concurrent.ExecutionContext + ): scala.concurrent.Future[ElasticResult[Boolean]] = { + executeAsyncJavaAction[Resp, Boolean](operation, index, retryable)(action)( + acknowledgedExtractor + ) + } + +} diff --git a/es8/java/src/main/scala/app/softnetwork/elastic/client/spi/JavaClientSpi.scala b/es8/java/src/main/scala/app/softnetwork/elastic/client/spi/JavaClientSpi.scala new file mode 100644 index 00000000..9b1657b4 --- /dev/null +++ b/es8/java/src/main/scala/app/softnetwork/elastic/client/spi/JavaClientSpi.scala @@ -0,0 +1,46 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.spi + +import app.softnetwork.elastic.client.ElasticClientApi +import app.softnetwork.elastic.client.java.JavaClientApi +import com.typesafe.config.Config + +class JavaClientSpi extends ElasticClientSpi { + + //format:off + /** Creates an Elasticsearch client instance. + * + * @param conf + * Typesafe configuration containing Elasticsearch parameters + * @return + * Configured ElasticClientApi instance + * @example + * {{{ + * class MyElasticClientProvider extends ElasticClientSpi { + * override def client(config: Config): ElasticClientApi = { + * new MyElasticClientImpl(config) + * } + * } + * }}} + */ + //format:on + override def client(conf: Config): ElasticClientApi = + new JavaClientApi { + override def config: Config = conf + } +} diff --git a/es8/java/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithJavaProvider.scala b/es8/java/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithJavaProvider.scala deleted file mode 100644 index 2e2add10..00000000 --- a/es8/java/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithJavaProvider.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2025 SOFTNETWORK - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package app.softnetwork.elastic.persistence.query - -import app.softnetwork.persistence.message.CrudEvent -import app.softnetwork.persistence.model.Timestamped -import app.softnetwork.persistence.query.{JournalProvider, OffsetProvider} - -trait State2ElasticProcessorStreamWithJavaProvider[T <: Timestamped, E <: CrudEvent] - extends State2ElasticProcessorStream[T, E] - with ElasticsearchClientProvider[T] { _: JournalProvider with OffsetProvider => } diff --git a/es8/java/src/test/scala/app/softnetwork/elastic/client/ElasticsearchClientSpec.scala b/es8/java/src/test/scala/app/softnetwork/elastic/client/ElasticsearchClientSpec.scala deleted file mode 100644 index e0165b77..00000000 --- a/es8/java/src/test/scala/app/softnetwork/elastic/client/ElasticsearchClientSpec.scala +++ /dev/null @@ -1,27 +0,0 @@ -package app.softnetwork.elastic.client - -import app.softnetwork.elastic.client.ElasticsearchProviders.{ - BinaryProvider, - ParentProvider, - PersonProvider, - SampleProvider -} -import app.softnetwork.elastic.model.{Binary, Parent, Sample} -import app.softnetwork.elastic.persistence.query.ElasticProvider -import app.softnetwork.persistence.person.model.Person - -class ElasticsearchClientSpec extends ElasticClientSpec { - - lazy val pClient: ElasticProvider[Person] with ElasticClientApi = new PersonProvider( - elasticConfig - ) - lazy val sClient: ElasticProvider[Sample] with ElasticClientApi = new SampleProvider( - elasticConfig - ) - lazy val bClient: ElasticProvider[Binary] with ElasticClientApi = new BinaryProvider( - elasticConfig - ) - lazy val parentClient: ElasticProvider[Parent] with ElasticClientApi = new ParentProvider( - elasticConfig - ) -} diff --git a/es8/java/src/test/scala/app/softnetwork/elastic/client/ElasticsearchProviders.scala b/es8/java/src/test/scala/app/softnetwork/elastic/client/ElasticsearchProviders.scala deleted file mode 100644 index 754a3416..00000000 --- a/es8/java/src/test/scala/app/softnetwork/elastic/client/ElasticsearchProviders.scala +++ /dev/null @@ -1,51 +0,0 @@ -package app.softnetwork.elastic.client - -import app.softnetwork.elastic.model.{Binary, Parent, Sample} -import app.softnetwork.elastic.persistence.query.ElasticsearchClientProvider -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.person.model.Person -import co.elastic.clients.elasticsearch.ElasticsearchClient -import com.typesafe.config.Config - -object ElasticsearchProviders { - - class PersonProvider(es: Config) - extends ElasticsearchClientProvider[Person] - with ManifestWrapper[Person] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val elasticsearchClient: ElasticsearchClient = apply() - } - - class SampleProvider(es: Config) - extends ElasticsearchClientProvider[Sample] - with ManifestWrapper[Sample] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val elasticsearchClient: ElasticsearchClient = apply() - } - - class BinaryProvider(es: Config) - extends ElasticsearchClientProvider[Binary] - with ManifestWrapper[Binary] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val elasticsearchClient: ElasticsearchClient = apply() - } - - class ParentProvider(es: Config) - extends ElasticsearchClientProvider[Parent] - with ManifestWrapper[Parent] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val elasticsearchClient: ElasticsearchClient = apply() - } -} diff --git a/es8/java/src/test/scala/app/softnetwork/elastic/client/JavaClientCompanionSpec.scala b/es8/java/src/test/scala/app/softnetwork/elastic/client/JavaClientCompanionSpec.scala new file mode 100644 index 00000000..6d88d236 --- /dev/null +++ b/es8/java/src/test/scala/app/softnetwork/elastic/client/JavaClientCompanionSpec.scala @@ -0,0 +1,122 @@ +package app.softnetwork.elastic.client + +import akka.actor.ActorSystem +import app.softnetwork.elastic.client.java.JavaClientCompanion +import app.softnetwork.elastic.scalatest.ElasticDockerTestKit +import app.softnetwork.persistence.generateUUID +import com.typesafe.config.ConfigFactory +import configs.ConfigReader +import org.scalatest.concurrent.ScalaFutures +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import org.slf4j.{Logger, LoggerFactory} + +import _root_.java.util.concurrent.TimeUnit +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, ExecutionContextExecutor, Future} +import scala.util.Try + +class JavaClientCompanionSpec + extends AnyWordSpec + with ElasticDockerTestKit + with Matchers + with ScalaFutures { + + lazy val log: Logger = LoggerFactory getLogger getClass.getName + + implicit val system: ActorSystem = ActorSystem(generateUUID()) + + implicit val executionContext: ExecutionContextExecutor = system.dispatcher + + override def afterAll(): Unit = { + Await.result(system.terminate(), Duration(30, TimeUnit.SECONDS)) + super.afterAll() + } + + "RestHighLevelClientCompanion" should { + + "initialize client lazily" in { + val companion = TestCompanion() + companion.isInitialized shouldBe false + + val client = companion.apply() + client should not be null + companion.isInitialized shouldBe true + } + + "return same instance on multiple calls" in { + val companion = TestCompanion() + val client1 = companion.apply() + val client2 = companion.apply() + + client1 should be theSameInstanceAs client2 + } + + "be thread-safe during initialization" in { + val companion = TestCompanion() + val futures = (1 to 100).map { _ => + Future { + companion.apply() + } + } + + val clients = Future.sequence(futures).futureValue + + // Tous les clients doivent être la même instance + clients.distinct.size shouldBe 1 + } + + "close client properly" in { + val companion = TestCompanion() + companion.apply() + companion.isInitialized shouldBe true + + companion.close() + companion.isInitialized shouldBe false + } + + "handle invalid URL gracefully" in { + val companion = TestCompanion("invalid-url") + + Try(an[IllegalArgumentException] should be thrownBy { + companion.apply() + }) + } + + "test connection successfully" in { + val companion = TestCompanion() + companion.testConnection() shouldBe true + } + } + + case class TestCompanion(config: ElasticConfig) extends JavaClientCompanion { + override def elasticConfig: ElasticConfig = config + } + + object TestCompanion { + def apply(): TestCompanion = TestCompanion( + ConfigReader[ElasticConfig] + .read(elasticConfig.withFallback(ConfigFactory.load("softnetwork-elastic.conf")), "elastic") + .toEither match { + case Left(configError) => + throw configError.configException + case Right(r) => r + } + ) + + def apply(url: String): TestCompanion = TestCompanion( + ConfigReader[ElasticConfig] + .read( + ConfigFactory + .parseString(elasticConfigAsString) + .withFallback(ConfigFactory.load("softnetwork-elastic.conf")), + "elastic" + ) + .toEither match { + case Left(configError) => + throw configError.configException + case Right(r) => r.copy(credentials = ElasticCredentials(url)) + } + ) + } +} diff --git a/es8/java/src/test/scala/app/softnetwork/elastic/client/JavaClientSpec.scala b/es8/java/src/test/scala/app/softnetwork/elastic/client/JavaClientSpec.scala new file mode 100644 index 00000000..cf255fb5 --- /dev/null +++ b/es8/java/src/test/scala/app/softnetwork/elastic/client/JavaClientSpec.scala @@ -0,0 +1,3 @@ +package app.softnetwork.elastic.client + +class JavaClientSpec extends ElasticClientSpec diff --git a/es8/java/src/test/scala/app/softnetwork/elastic/persistence/person/ElasticsearchClientPersonHandlerSpec.scala b/es8/java/src/test/scala/app/softnetwork/elastic/persistence/person/ElasticsearchClientPersonHandlerSpec.scala deleted file mode 100644 index 5187bed3..00000000 --- a/es8/java/src/test/scala/app/softnetwork/elastic/persistence/person/ElasticsearchClientPersonHandlerSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package app.softnetwork.elastic.persistence.person - -import akka.actor.typed.ActorSystem -import app.softnetwork.elastic.client.java.ElasticsearchClientApi -import app.softnetwork.elastic.persistence.person.ElasticPersonTestKit -import app.softnetwork.elastic.persistence.query.{ElasticProvider, PersonToElasticProcessorStream} -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.person.model.Person -import app.softnetwork.persistence.person.query.PersonToExternalProcessorStream -import app.softnetwork.persistence.query.ExternalPersistenceProvider -import com.typesafe.config.Config -import org.slf4j.{Logger, LoggerFactory} - -import scala.concurrent.ExecutionContextExecutor - -class ElasticsearchClientPersonHandlerSpec extends ElasticPersonTestKit { - - implicit val ec: ExecutionContextExecutor = typedSystem().executionContext - - override def externalPersistenceProvider: ExternalPersistenceProvider[Person] = - new ElasticProvider[Person] with ElasticsearchClientApi with ManifestWrapper[Person] { - override protected val manifestWrapper: ManifestW = ManifestW() - override lazy val config: Config = ElasticsearchClientPersonHandlerSpec.this.elasticConfig - } - - override def person2ExternalProcessorStream: ActorSystem[_] => PersonToExternalProcessorStream = - sys => - new PersonToElasticProcessorStream with ElasticsearchClientApi { - override val forTests: Boolean = true - override protected val manifestWrapper: ManifestW = ManifestW() - override implicit def system: ActorSystem[_] = sys - override def log: Logger = LoggerFactory getLogger getClass.getName - override lazy val config: Config = ElasticsearchClientPersonHandlerSpec.this.elasticConfig - } - - override def log: Logger = LoggerFactory getLogger getClass.getName -} diff --git a/es8/java/src/test/scala/app/softnetwork/elastic/persistence/person/JavaClientPersonHandlerSpec.scala b/es8/java/src/test/scala/app/softnetwork/elastic/persistence/person/JavaClientPersonHandlerSpec.scala new file mode 100644 index 00000000..91651d91 --- /dev/null +++ b/es8/java/src/test/scala/app/softnetwork/elastic/persistence/person/JavaClientPersonHandlerSpec.scala @@ -0,0 +1,3 @@ +package app.softnetwork.elastic.persistence.person + +class JavaClientPersonHandlerSpec extends ElasticClientPersonHandlerSpec diff --git a/es9/java/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi b/es9/java/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi new file mode 100644 index 00000000..e4b5ab16 --- /dev/null +++ b/es9/java/src/main/resources/META-INF/services/app.softnetwork.elastic.client.spi.ElasticClientSpi @@ -0,0 +1 @@ +app.softnetwork.elastic.client.spi.JavaClientSpi \ No newline at end of file diff --git a/es9/java/src/main/scala/app/softnetwork/elastic/client/java/ElasticsearchClientApi.scala b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/ElasticsearchClientApi.scala deleted file mode 100644 index c0ad301c..00000000 --- a/es9/java/src/main/scala/app/softnetwork/elastic/client/java/ElasticsearchClientApi.scala +++ /dev/null @@ -1,1012 +0,0 @@ -/* - * Copyright 2025 SOFTNETWORK - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package app.softnetwork.elastic.client.java - -import akka.NotUsed -import akka.actor.ActorSystem -import akka.stream.scaladsl.Flow -import app.softnetwork.elastic.client._ -import app.softnetwork.elastic.sql.bridge._ -import app.softnetwork.elastic.sql.query.{SQLQuery, SQLSearchRequest} -import app.softnetwork.elastic.{client, sql} -import app.softnetwork.persistence.model.Timestamped -import app.softnetwork.serialization.serialization -import co.elastic.clients.elasticsearch.core.bulk.{ - BulkOperation, - BulkResponseItem, - DeleteOperation, - IndexOperation, - UpdateAction, - UpdateOperation -} -import co.elastic.clients.elasticsearch.core.msearch.{MultisearchHeader, RequestItem} -import co.elastic.clients.elasticsearch.core._ -import co.elastic.clients.elasticsearch.core.reindex.{Destination, Source} -import co.elastic.clients.elasticsearch.core.search.SearchRequestBody -import co.elastic.clients.elasticsearch.indices.update_aliases.{Action, AddAction, RemoveAction} -import co.elastic.clients.elasticsearch.indices.{ExistsRequest => IndexExistsRequest, _} -import co.elastic.clients.json.jackson.JacksonJsonpMapper -import com.google.gson.{Gson, JsonParser} - -import _root_.java.io.{StringReader, StringWriter} -import _root_.java.util.{Map => JMap} -//import scala.jdk.CollectionConverters._ -import scala.collection.JavaConverters._ -import org.json4s.Formats - -import scala.concurrent.{ExecutionContext, Future, Promise} -import scala.language.implicitConversions -import scala.util.{Failure, Success, Try} - -trait ElasticsearchClientApi - extends ElasticClientApi - with ElasticsearchClientIndicesApi - with ElasticsearchClientAliasApi - with ElasticsearchClientSettingsApi - with ElasticsearchClientMappingApi - with ElasticsearchClientRefreshApi - with ElasticsearchClientFlushApi - with ElasticsearchClientCountApi - with ElasticsearchClientSingleValueAggregateApi - with ElasticsearchClientIndexApi - with ElasticsearchClientUpdateApi - with ElasticsearchClientDeleteApi - with ElasticsearchClientGetApi - with ElasticsearchClientSearchApi - with ElasticsearchClientBulkApi - -trait ElasticsearchClientIndicesApi extends IndicesApi with ElasticsearchClientCompanion { - override def createIndex(index: String, settings: String): Boolean = { - tryOrElse( - apply() - .indices() - .create( - new CreateIndexRequest.Builder() - .index(index) - .settings(new IndexSettings.Builder().withJson(new StringReader(settings)).build()) - .build() - ) - .acknowledged(), - false - )(logger) - } - - override def deleteIndex(index: String): Boolean = { - tryOrElse( - apply() - .indices() - .delete(new DeleteIndexRequest.Builder().index(index).build()) - .acknowledged(), - false - )(logger) - } - - override def openIndex(index: String): Boolean = { - tryOrElse( - apply().indices().open(new OpenRequest.Builder().index(index).build()).acknowledged(), - false - )(logger) - } - - override def closeIndex(index: String): Boolean = { - tryOrElse( - apply().indices().close(new CloseIndexRequest.Builder().index(index).build()).acknowledged(), - false - )(logger) - } - - override def reindex( - sourceIndex: String, - targetIndex: String, - refresh: Boolean = true - ): Boolean = { - val failures = apply() - .reindex( - new ReindexRequest.Builder() - .source(new Source.Builder().index(sourceIndex).build()) - .dest(new Destination.Builder().index(targetIndex).build()) - .refresh(refresh) - .build() - ) - .failures() - .asScala - .map(_.cause().reason()) - if (failures.nonEmpty) { - logger.error( - s"Reindexing from $sourceIndex to $targetIndex failed with errors: ${failures.take(100).mkString(", ")}" - ) - } - failures.isEmpty - } - - override def indexExists(index: String): Boolean = { - tryOrElse( - apply() - .indices() - .exists( - new IndexExistsRequest.Builder().index(index).build() - ) - .value(), - false - )(logger) - } -} - -trait ElasticsearchClientAliasApi extends AliasApi with ElasticsearchClientCompanion { - override def addAlias(index: String, alias: String): Boolean = { - tryOrElse( - apply() - .indices() - .updateAliases( - new UpdateAliasesRequest.Builder() - .actions( - new Action.Builder() - .add(new AddAction.Builder().index(index).alias(alias).build()) - .build() - ) - .build() - ) - .acknowledged(), - false - )(logger) - } - - override def removeAlias(index: String, alias: String): Boolean = { - tryOrElse( - apply() - .indices() - .updateAliases( - new UpdateAliasesRequest.Builder() - .actions( - new Action.Builder() - .remove(new RemoveAction.Builder().index(index).alias(alias).build()) - .build() - ) - .build() - ) - .acknowledged(), - false - )(logger) - } -} - -trait ElasticsearchClientSettingsApi extends SettingsApi with ElasticsearchClientCompanion { - _: ElasticsearchClientIndicesApi => - - override def updateSettings(index: String, settings: String): Boolean = { - tryOrElse( - apply() - .indices() - .putSettings( - new PutIndicesSettingsRequest.Builder() - .index(index) - .settings(new IndexSettings.Builder().withJson(new StringReader(settings)).build()) - .build() - ) - .acknowledged(), - false - )(logger) - } - - override def loadSettings(index: String): String = { - tryOrElse( - Option( - apply() - .indices() - .getSettings( - new GetIndicesSettingsRequest.Builder().index(index).build() - ) - .get(index) - ).map { value => - val mapper = new JacksonJsonpMapper() - val writer = new StringWriter() - val generator = mapper.jsonProvider().createGenerator(writer) - mapper.serialize(value.settings().index(), generator) - generator.close() - writer.toString - }, - None - )(logger).getOrElse("{}") - } -} - -trait ElasticsearchClientMappingApi - extends MappingApi - with ElasticsearchClientIndicesApi - with ElasticsearchClientRefreshApi - with ElasticsearchClientCompanion { - override def setMapping(index: String, mapping: String): Boolean = { - tryOrElse( - apply() - .indices() - .putMapping( - new PutMappingRequest.Builder().index(index).withJson(new StringReader(mapping)).build() - ) - .acknowledged(), - false - )(logger) - } - - override def getMapping(index: String): String = { - tryOrElse( - { - Option( - apply() - .indices() - .getMapping( - new GetMappingRequest.Builder().index(index).build() - ) - .get(index) - ).map { value => - val mapper = new JacksonJsonpMapper() - val writer = new StringWriter() - val generator = mapper.jsonProvider().createGenerator(writer) - mapper.serialize(value, generator) - generator.close() - writer.toString - } - }, - None - )(logger).getOrElse(s""""{$index: {"mappings": {}}}""") - } -} - -trait ElasticsearchClientRefreshApi extends RefreshApi with ElasticsearchClientCompanion { - override def refresh(index: String): Boolean = { - tryOrElse( - apply() - .indices() - .refresh( - new RefreshRequest.Builder().index(index).build() - ) - .shards() - .failed() - .intValue() == 0, - false - )(logger) - } -} - -trait ElasticsearchClientFlushApi extends FlushApi with ElasticsearchClientCompanion { - override def flush(index: String, force: Boolean = true, wait: Boolean = true): Boolean = { - tryOrElse( - apply() - .indices() - .flush( - new FlushRequest.Builder().index(index).force(force).waitIfOngoing(wait).build() - ) - .shards() - .failed() - .intValue() == 0, - false - )(logger) - } -} - -trait ElasticsearchClientCountApi extends CountApi with ElasticsearchClientCompanion { - override def count(query: client.JSONQuery): Option[Double] = { - tryOrElse( - Option( - apply() - .count( - new CountRequest.Builder().index(query.indices.asJava).build() - ) - .count() - .toDouble - ), - None - )(logger) - } - - override def countAsync(query: client.JSONQuery)(implicit - ec: ExecutionContext - ): Future[Option[Double]] = { - fromCompletableFuture( - async() - .count( - new CountRequest.Builder().index(query.indices.asJava).build() - ) - ).map(response => Option(response.count().toDouble)) - } -} - -trait ElasticsearchClientSingleValueAggregateApi - extends SingleValueAggregateApi - with ElasticsearchClientCountApi { - private[this] def aggregateValue(value: Double, valueAsString: String): AggregateValue = - if (valueAsString.nonEmpty) StringValue(valueAsString) - else NumericValue(value) - - override def aggregate( - sqlQuery: SQLQuery - )(implicit ec: ExecutionContext): Future[Seq[SingleValueAggregateResult]] = { - val aggregations: Seq[ElasticAggregation] = sqlQuery - val futures = for (aggregation <- aggregations) yield { - val promise: Promise[SingleValueAggregateResult] = Promise() - val field = aggregation.field - val sourceField = aggregation.sourceField - val aggType = aggregation.aggType - val aggName = aggregation.aggName - val query = aggregation.query.getOrElse("") - val sources = aggregation.sources - sourceField match { - case "_id" if aggType.sql == "count" => - countAsync( - JSONQuery( - query, - collection.immutable.Seq(sources: _*), - collection.immutable.Seq.empty[String] - ) - ).onComplete { - case Success(result) => - promise.success( - SingleValueAggregateResult( - field, - aggType, - NumericValue(result.getOrElse(0d)), - None - ) - ) - case Failure(f) => - logger.error(f.getMessage, f.fillInStackTrace()) - promise.success( - SingleValueAggregateResult(field, aggType, EmptyValue, Some(f.getMessage)) - ) - } - promise.future - case _ => - val jsonQuery = JSONQuery( - query, - collection.immutable.Seq(sources: _*), - collection.immutable.Seq.empty[String] - ) - import jsonQuery._ - logger.info( - s"Aggregating with query: ${jsonQuery.query} on indices: ${indices.mkString(", ")}" - ) - // Create a parser for the query - Try( - apply().search( - new SearchRequest.Builder() - .index(indices.asJava) - .withJson( - new StringReader(jsonQuery.query) - ) - .build() - ) - ) match { - case Success(response) => - logger.debug( - s"Aggregation response: ${response.toString}" - ) - val agg = aggName.split("\\.").last - - val itAgg = aggName.split("\\.").iterator - - var root = - if (aggregation.nested) { - response.aggregations().get(itAgg.next()).nested().aggregations() - } else { - response.aggregations() - } - - if (aggregation.filtered) { - root = root.get(itAgg.next()).filter().aggregations() - } - - promise.success( - SingleValueAggregateResult( - field, - aggType, - aggType match { - case sql.function.aggregate.COUNT => - NumericValue( - if (aggregation.distinct) { - root.get(agg).cardinality().value().toDouble - } else { - root.get(agg).valueCount().value() - } - ) - case sql.function.aggregate.SUM => - NumericValue(root.get(agg).sum().value()) - case sql.function.aggregate.AVG => - val avgAgg = root.get(agg).avg() - aggregateValue(avgAgg.value(), avgAgg.valueAsString()) - case sql.function.aggregate.MIN => - val minAgg = root.get(agg).min() - aggregateValue(minAgg.value(), minAgg.valueAsString()) - case sql.function.aggregate.MAX => - val maxAgg = root.get(agg).max() - aggregateValue(maxAgg.value(), maxAgg.valueAsString()) - case _ => EmptyValue - }, - None - ) - ) - case Failure(exception) => - logger.error(s"Failed to execute search for aggregation: $aggName", exception) - promise.success( - SingleValueAggregateResult( - field, - aggType, - EmptyValue, - Some(exception.getMessage) - ) - ) - } - promise.future - } - } - Future.sequence(futures) - } -} - -trait ElasticsearchClientIndexApi extends IndexApi with ElasticsearchClientCompanion { - _: ElasticsearchClientRefreshApi => - override def index(index: String, id: String, source: String): Boolean = { - tryOrElse( - apply() - .index( - new IndexRequest.Builder() - .index(index) - .id(id) - .withJson(new StringReader(source)) - .build() - ) - .shards() - .failed() - .intValue() == 0, - false - )(logger) - } - - override def indexAsync(index: String, id: String, source: String)(implicit - ec: ExecutionContext - ): Future[Boolean] = { - fromCompletableFuture( - async() - .index( - new IndexRequest.Builder() - .index(index) - .id(id) - .withJson(new StringReader(source)) - .build() - ) - ).flatMap { response => - if (response.shards().failed().intValue() == 0) { - Future.successful(true) - } else { - Future.failed(new Exception(s"Failed to index document with id: $id in index: $index")) - } - } - } -} - -trait ElasticsearchClientUpdateApi extends UpdateApi with ElasticsearchClientCompanion { - _: ElasticsearchClientRefreshApi => - override def update( - index: String, - id: String, - source: String, - upsert: Boolean - ): Boolean = { - tryOrElse( - apply() - .update( - new UpdateRequest.Builder[JMap[String, Object], JMap[String, Object]]() - .index(index) - .id(id) - .doc(mapper.readValue(source, classOf[JMap[String, Object]])) - .docAsUpsert(upsert) - .build(), - classOf[JMap[String, Object]] - ) - .shards() - .failed() - .intValue() == 0, - false - )(logger) - } - - override def updateAsync(index: String, id: String, source: String, upsert: Boolean)(implicit - ec: ExecutionContext - ): Future[Boolean] = { - fromCompletableFuture( - async() - .update( - new UpdateRequest.Builder[JMap[String, Object], JMap[String, Object]]() - .index(index) - .id(id) - .doc(mapper.readValue(source, classOf[JMap[String, Object]])) - .docAsUpsert(upsert) - .build(), - classOf[JMap[String, Object]] - ) - ).flatMap { response => - if (response.shards().failed().intValue() == 0) { - Future.successful(true) - } else { - Future.failed(new Exception(s"Failed to update document with id: $id in index: $index")) - } - } - } -} - -trait ElasticsearchClientDeleteApi extends DeleteApi with ElasticsearchClientCompanion { - _: ElasticsearchClientRefreshApi => - - override def delete(uuid: String, index: String): Boolean = { - tryOrElse( - apply() - .delete( - new DeleteRequest.Builder().index(index).id(uuid).build() - ) - .shards() - .failed() - .intValue() == 0, - false - )(logger) - } - - override def deleteAsync(uuid: String, index: String)(implicit - ec: ExecutionContext - ): Future[Boolean] = { - fromCompletableFuture( - async() - .delete( - new DeleteRequest.Builder().index(index).id(uuid).build() - ) - ).flatMap { response => - if (response.shards().failed().intValue() == 0) { - Future.successful(true) - } else { - Future.failed(new Exception(s"Failed to delete document with id: $uuid in index: $index")) - } - } - } - -} - -trait ElasticsearchClientGetApi extends GetApi with ElasticsearchClientCompanion { - - def get[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], formats: Formats): Option[U] = { - Try( - apply().get( - new GetRequest.Builder() - .index( - index.getOrElse( - maybeType.getOrElse( - m.runtimeClass.getSimpleName.toLowerCase - ) - ) - ) - .id(id) - .build(), - classOf[JMap[String, Object]] - ) - ) match { - case Success(response) => - if (response.found()) { - val source = mapper.writeValueAsString(response.source()) - logger.debug(s"Deserializing response $source for id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}") - // Deserialize the source string to the expected type - // Note: This assumes that the source is a valid JSON representation of U - // and that the serialization library is capable of handling it. - Try(serialization.read[U](source)) match { - case Success(value) => Some(value) - case Failure(f) => - logger.error( - s"Failed to deserialize response $source for id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}", - f - ) - None - } - } else { - None - } - case Failure(f) => - logger.error( - s"Failed to get document with id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}", - f - ) - None - } - } - - override def getAsync[U <: Timestamped]( - id: String, - index: Option[String] = None, - maybeType: Option[String] = None - )(implicit m: Manifest[U], ec: ExecutionContext, formats: Formats): Future[Option[U]] = { - fromCompletableFuture( - async() - .get( - new GetRequest.Builder() - .index( - index.getOrElse( - maybeType.getOrElse( - m.runtimeClass.getSimpleName.toLowerCase - ) - ) - ) - .id(id) - .build(), - classOf[JMap[String, Object]] - ) - ).flatMap { - case response if response.found() => - val source = mapper.writeValueAsString(response.source()) - logger.debug(s"Deserializing response $source for id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}") - // Deserialize the source string to the expected type - // Note: This assumes that the source is a valid JSON representation of U - // and that the serialization library is capable of handling it. - Try(serialization.read[U](source)) match { - case Success(value) => Future.successful(Some(value)) - case Failure(f) => - logger.error( - s"Failed to deserialize response $source for id: $id, index: ${index - .getOrElse("default")}, type: ${maybeType.getOrElse("_all")}", - f - ) - Future.successful(None) - } - case _ => Future.successful(None) - } - Future { - this.get[U](id, index, maybeType) - } - } -} - -trait ElasticsearchClientSearchApi extends SearchApi with ElasticsearchClientCompanion { - override implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String = - implicitly[ElasticSearchRequest](sqlSearch).query - - override def search[U]( - jsonQuery: JSONQuery - )(implicit m: Manifest[U], formats: Formats): List[U] = { - import jsonQuery._ - logger.info(s"Searching with query: $query on indices: ${indices.mkString(", ")}") - val response = apply().search( - new SearchRequest.Builder() - .index(indices.asJava) - .withJson( - new StringReader(query) - ) - .build(), - classOf[JMap[String, Object]] - ) - if (response.hits().total().value() > 0) { - response - .hits() - .hits() - .asScala - .flatMap { hit => - val source = mapper.writeValueAsString(hit.source()) - logger.debug(s"Deserializing hit: $source") - Try(serialization.read[U](source)).toOption.orElse { - logger.error( - s"Failed to deserialize hit: $source" - ) - None - } - } - .toList - } else { - List.empty[U] - } - } - - override def searchAsync[U]( - sqlQuery: SQLQuery - )(implicit m: Manifest[U], ec: ExecutionContext, formats: Formats): Future[List[U]] = { - val jsonQuery: JSONQuery = sqlQuery - import jsonQuery._ - fromCompletableFuture( - async() - .search( - new SearchRequest.Builder() - .index(indices.asJava) - .withJson(new StringReader(query)) - .build(), - classOf[JMap[String, Object]] - ) - ).flatMap { - case response if response.hits().total().value() > 0 => - Future.successful( - response - .hits() - .hits() - .asScala - .map { hit => - val source = mapper.writeValueAsString(hit.source()) - logger.debug(s"Deserializing hit: $source") - serialization.read[U](source) - } - .toList - ) - case _ => - logger.warn( - s"No hits found for query: ${sqlQuery.query} on indices: ${indices.mkString(", ")}" - ) - Future.successful(List.empty[U]) - } - } - - override def searchWithInnerHits[U, I](jsonQuery: JSONQuery, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[(U, List[I])] = { - import jsonQuery._ - logger.info(s"Searching with query: $query on indices: ${indices.mkString(", ")}") - val response = apply() - .search( - new SearchRequest.Builder() - .index(indices.asJava) - .withJson( - new StringReader(query) - ) - .build(), - classOf[JMap[String, Object]] - ) - val results = response - .hits() - .hits() - .asScala - .toList - if (results.nonEmpty) { - results.flatMap { hit => - val hitSource = hit.source() - Option(hitSource) - .map(mapper.writeValueAsString) - .flatMap { source => - logger.debug(s"Deserializing hit: $source") - Try(serialization.read[U](source)) match { - case Success(mainObject) => - Some(mainObject) - case Failure(f) => - logger.error( - s"Failed to deserialize hit: $source for query: $query on indices: ${indices.mkString(", ")}", - f - ) - None - } - } - .map { mainObject => - val innerHits = hit - .innerHits() - .asScala - .get(innerField) - .map(_.hits().hits().asScala.toList) - .getOrElse(Nil) - val innerObjects = innerHits.flatMap { innerHit => - val mapper = new JacksonJsonpMapper() - val writer = new StringWriter() - val generator = mapper.jsonProvider().createGenerator(writer) - mapper.serialize(innerHit, generator) - generator.close() - val innerSource = writer.toString - logger.debug(s"Processing inner hit: $innerSource") - val json = new JsonParser().parse(innerSource).getAsJsonObject - val gson = new Gson() - Try(serialization.read[I](gson.toJson(json.get("_source")))) match { - case Success(innerObject) => Some(innerObject) - case Failure(f) => - logger.error(s"Failed to deserialize inner hit: $innerSource", f) - None - } - } - (mainObject, innerObjects) - } - } - } else { - logger.warn(s"No hits found for query: $query on indices: ${indices.mkString(", ")}") - List.empty[(U, List[I])] - } - } - - override def multiSearch[U]( - jsonQueries: JSONQueries - )(implicit m: Manifest[U], formats: Formats): List[List[U]] = { - import jsonQueries._ - - val items = queries.map { query => - new RequestItem.Builder() - .header(new MultisearchHeader.Builder().index(query.indices.asJava).build()) - .body(new SearchRequestBody.Builder().withJson(new StringReader(query.query)).build()) - .build() - } - - val request = new MsearchRequest.Builder().searches(items.asJava).build() - val responses = apply().msearch(request, classOf[JMap[String, Object]]) - - responses.responses().asScala.toList.map { - case response if response.isFailure => - logger.error(s"Error in multi search: ${response.failure().error().reason()}") - List.empty[U] - - case response => - response - .result() - .hits() - .hits() - .asScala - .toList - .map(hit => serialization.read[U](mapper.writeValueAsString(hit.source()))) - } - } - - override def multiSearchWithInnerHits[U, I](jsonQueries: JSONQueries, innerField: String)(implicit - m1: Manifest[U], - m2: Manifest[I], - formats: Formats - ): List[List[(U, List[I])]] = { - import jsonQueries._ - val items = queries.map { query => - new RequestItem.Builder() - .header(new MultisearchHeader.Builder().index(query.indices.asJava).build()) - .body(new SearchRequestBody.Builder().withJson(new StringReader(query.query)).build()) - .build() - } - - val request = new MsearchRequest.Builder().searches(items.asJava).build() - val responses = apply().msearch(request, classOf[JMap[String, Object]]) - - responses.responses().asScala.toList.map { - case response if response.isFailure => - logger.error(s"Error in multi search: ${response.failure().error().reason()}") - List.empty[(U, List[I])] - - case response => - Try( - new JsonParser().parse(response.result().toString).getAsJsonObject ~> [U, I] innerField - ) match { - case Success(s) => s - case Failure(f) => - logger.error(f.getMessage, f) - List.empty - } - } - } - -} - -trait ElasticsearchClientBulkApi - extends ElasticsearchClientRefreshApi - with ElasticsearchClientSettingsApi - with ElasticsearchClientIndicesApi - with BulkApi { - override type A = BulkOperation - override type R = BulkResponse - - override def toBulkAction(bulkItem: BulkItem): A = { - import bulkItem._ - - action match { - case BulkAction.UPDATE => - new BulkOperation.Builder() - .update( - new UpdateOperation.Builder() - .index(index) - .id(id.orNull) - .action( - new UpdateAction.Builder[JMap[String, Object], JMap[String, Object]]() - .doc(mapper.readValue(body, classOf[JMap[String, Object]])) - .docAsUpsert(true) - .build() - ) - .build() - ) - .build() - - case BulkAction.DELETE => - val deleteId = id.getOrElse { - throw new IllegalArgumentException(s"Missing id for delete on index $index") - } - new BulkOperation.Builder() - .delete(new DeleteOperation.Builder().index(index).id(deleteId).build()) - .build() - - case _ => - new BulkOperation.Builder() - .index( - new IndexOperation.Builder[JMap[String, Object]]() - .index(index) - .id(id.orNull) - .document(mapper.readValue(body, classOf[JMap[String, Object]])) - .build() - ) - .build() - } - } - override def bulkResult: Flow[R, Set[String], NotUsed] = - Flow[BulkResponse] - .named("result") - .map(result => { - val items = result.items().asScala.toList - val grouped = items.groupBy(_.index()) - val indices = grouped.keys.toSet - for (index <- indices) { - logger - .info(s"Bulk operation succeeded for index $index with ${grouped(index).length} items.") - } - indices - }) - - override def bulk(implicit - bulkOptions: BulkOptions, - system: ActorSystem - ): Flow[Seq[A], R, NotUsed] = { - val parallelism = Math.max(1, bulkOptions.balance) - Flow[Seq[A]] - .named("bulk") - .mapAsyncUnordered[R](parallelism) { items => - val request = - new BulkRequest.Builder().index(bulkOptions.index).operations(items.asJava).build() - Try(apply().bulk(request)) match { - case Success(response) if response.errors() => - val failedItems = response.items().asScala.filter(_.status() >= 400) - if (failedItems.nonEmpty) { - val errorMessages = - failedItems.map(i => s"${i.id()} - ${i.error().reason()}").mkString(", ") - Future.failed(new Exception(s"Bulk operation failed for items: $errorMessages")) - } else { - Future.successful(response) - } - case Success(response) => - Future.successful(response) - case Failure(exception) => - logger.error("Bulk operation failed", exception) - Future.failed(exception) - } - } - } - - private[this] def toBulkElasticResultItem(i: BulkResponseItem): BulkElasticResultItem = - new BulkElasticResultItem { - override def index: String = i.index() - } - - override implicit def toBulkElasticAction(a: BulkOperation): BulkElasticAction = - new BulkElasticAction { - override def index: String = { - a match { - case op if op.isIndex => op.index().index() - case op if op.isDelete => op.delete().index() - case op if op.isUpdate => op.update().index() - case _ => - throw new IllegalArgumentException(s"Unsupported bulk operation type: ${a.getClass}") - } - } - } - - override implicit def toBulkElasticResult(r: BulkResponse): BulkElasticResult = { - new BulkElasticResult { - override def items: List[BulkElasticResultItem] = - r.items().asScala.toList.map(toBulkElasticResultItem) - } - } -} diff --git a/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala new file mode 100644 index 00000000..eede5b68 --- /dev/null +++ b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientApi.scala @@ -0,0 +1,1467 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.java + +import akka.NotUsed +import akka.actor.ActorSystem +import akka.stream.scaladsl +import akka.stream.scaladsl.{Flow, Source} +import app.softnetwork.elastic.client._ +import app.softnetwork.elastic.client.bulk._ +import app.softnetwork.elastic.client.scroll._ +import app.softnetwork.elastic.sql.bridge._ +import app.softnetwork.elastic.sql.query.{SQLAggregation, SQLSearchRequest} +import app.softnetwork.elastic.client +import app.softnetwork.elastic.client.result.{ElasticFailure, ElasticResult, ElasticSuccess} +import co.elastic.clients.elasticsearch._types.{FieldSort, FieldValue, SortOptions, SortOrder, Time} +import co.elastic.clients.elasticsearch.core.bulk.{ + BulkOperation, + DeleteOperation, + IndexOperation, + UpdateAction, + UpdateOperation +} +import co.elastic.clients.elasticsearch.core.msearch.{MultisearchHeader, RequestItem} +import co.elastic.clients.elasticsearch.core._ +import co.elastic.clients.elasticsearch.core.reindex.{Destination, Source => ESSource} +import co.elastic.clients.elasticsearch.core.search.{PointInTimeReference, SearchRequestBody} +import co.elastic.clients.elasticsearch.indices.update_aliases.{Action, AddAction, RemoveAction} +import co.elastic.clients.elasticsearch.indices.{ExistsRequest => IndexExistsRequest, _} +import com.google.gson.JsonParser + +import _root_.java.io.{IOException, StringReader} +import _root_.java.util.{Map => JMap} +import scala.jdk.CollectionConverters._ + +import scala.concurrent.{ExecutionContext, Future} +import scala.language.implicitConversions +import scala.util.{Failure, Success, Try} + +trait JavaClientApi + extends ElasticClientApi + with JavaClientIndicesApi + with JavaClientAliasApi + with JavaClientSettingsApi + with JavaClientMappingApi + with JavaClientRefreshApi + with JavaClientFlushApi + with JavaClientCountApi + with JavaClientIndexApi + with JavaClientUpdateApi + with JavaClientDeleteApi + with JavaClientGetApi + with JavaClientSearchApi + with JavaClientBulkApi + with JavaClientScrollApi + with JavaClientCompanion + with JavaClientVersionApi + +/** Elasticsearch client implementation using the Java Client + * @see + * [[VersionApi]] for version information + */ +trait JavaClientVersionApi extends VersionApi with JavaClientHelpers { + _: SerializationApi with JavaClientCompanion => + override private[client] def executeVersion(): result.ElasticResult[String] = + executeJavaAction( + operation = "version", + index = None, + retryable = true + )( + apply().info() + ) { response => + response.version().number() + } +} + +/** Elasticsearch client implementation of Indices API using the Java Client + * @see + * [[IndicesApi]] for index management operations + */ +trait JavaClientIndicesApi extends IndicesApi with RefreshApi with JavaClientHelpers { + _: JavaClientCompanion => + override private[client] def executeCreateIndex( + index: String, + settings: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "createIndex", + index = Some(index), + retryable = false + )( + apply() + .indices() + .create( + new CreateIndexRequest.Builder() + .index(index) + .settings(new IndexSettings.Builder().withJson(new StringReader(settings)).build()) + .build() + ) + )(_.acknowledged()) + + override private[client] def executeDeleteIndex(index: String): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "deleteIndex", + index = Some(index), + retryable = false + )( + apply() + .indices() + .delete(new DeleteIndexRequest.Builder().index(index).build()) + )(_.acknowledged()) + + override private[client] def executeCloseIndex(index: String): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "closeIndex", + index = Some(index), + retryable = false + )( + apply() + .indices() + .close(new CloseIndexRequest.Builder().index(index).build()) + )(_.acknowledged()) + + override private[client] def executeOpenIndex(index: String): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "openIndex", + index = Some(index), + retryable = false + )( + apply() + .indices() + .open(new OpenRequest.Builder().index(index).build()) + )(_.acknowledged()) + + override private[client] def executeReindex( + sourceIndex: String, + targetIndex: String, + refresh: Boolean + ): result.ElasticResult[(Boolean, Option[Long])] = + executeJavaAction( + operation = "reindex", + index = Some(s"$sourceIndex -> $targetIndex"), + retryable = false + )( + apply() + .reindex( + new ReindexRequest.Builder() + .source(new ESSource.Builder().index(sourceIndex).build()) + .dest(new Destination.Builder().index(targetIndex).build()) + .refresh(refresh) + .build() + ) + ) { response => + val failures = response.failures().asScala.map(_.cause().reason()) + if (failures.nonEmpty) { + logger.error( + s"Reindexing from $sourceIndex to $targetIndex failed with errors: ${failures.take(10).mkString(", ")}" + ) + } + (failures.isEmpty, Option(response.total())) + } + + override private[client] def executeIndexExists(index: String): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "indexExists", + index = Some(index), + retryable = false + )( + apply() + .indices() + .exists( + new IndexExistsRequest.Builder().index(index).build() + ) + )(_.value()) + +} + +/** Elasticsearch client implementation of Alias API using the Java Client + * @see + * [[AliasApi]] for alias management operations + */ +trait JavaClientAliasApi extends AliasApi with JavaClientHelpers { + _: IndicesApi with JavaClientCompanion => + + override private[client] def executeAddAlias( + index: String, + alias: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "addAlias", + index = Some(index), + retryable = false + )( + apply() + .indices() + .updateAliases( + new UpdateAliasesRequest.Builder() + .actions( + new Action.Builder() + .add(new AddAction.Builder().index(index).alias(alias).build()) + .build() + ) + .build() + ) + )(_.acknowledged()) + + override private[client] def executeRemoveAlias( + index: String, + alias: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "removeAlias", + index = Some(index), + retryable = false + )( + apply() + .indices() + .updateAliases( + new UpdateAliasesRequest.Builder() + .actions( + new Action.Builder() + .remove(new RemoveAction.Builder().index(index).alias(alias).build()) + .build() + ) + .build() + ) + )(_.acknowledged()) + + override private[client] def executeAliasExists(alias: String): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "aliasExists", + index = None, + retryable = false + )( + apply() + .indices() + .existsAlias( + new ExistsAliasRequest.Builder().name(alias).build() + ) + )(_.value()) + + override private[client] def executeGetAliases(index: String): result.ElasticResult[String] = + executeJavaAction( + operation = "getAliases", + index = Some(index), + retryable = false + )( + apply() + .indices() + .getAlias( + new GetAliasRequest.Builder().index(index).build() + ) + )(response => convertToJson(response)) + + override private[client] def executeSwapAlias( + oldIndex: String, + newIndex: String, + alias: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "swapAlias", + index = Some(s"$oldIndex <-> $newIndex"), + retryable = false + )( + apply() + .indices() + .updateAliases( + new UpdateAliasesRequest.Builder() + .actions( + List( + new Action.Builder() + .remove(new RemoveAction.Builder().index(oldIndex).alias(alias).build()) + .build(), + new Action.Builder() + .add(new AddAction.Builder().index(newIndex).alias(alias).build()) + .build() + ).asJava + ) + .build() + ) + )(_.acknowledged()) + +} + +/** Elasticsearch client implementation of Settings API using the Java Client + * @see + * [[SettingsApi]] for settings management operations + */ +trait JavaClientSettingsApi extends SettingsApi with JavaClientHelpers { + _: IndicesApi with JavaClientCompanion => + + override private[client] def executeUpdateSettings( + index: String, + settings: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "updateSettings", + index = Some(index), + retryable = false + )( + apply() + .indices() + .putSettings( + new PutIndicesSettingsRequest.Builder() + .index(index) + .settings(new IndexSettings.Builder().withJson(new StringReader(settings)).build()) + .build() + ) + )(_.acknowledged()) + + override private[client] def executeLoadSettings(index: String): result.ElasticResult[String] = + executeJavaAction( + operation = "loadSettings", + index = Some(index), + retryable = true + )( + apply() + .indices() + .getSettings( + new GetIndicesSettingsRequest.Builder().index(index).build() + ) + )(response => convertToJson(response)) + +} + +/** Elasticsearch client implementation of Mapping API using the Java Client + * @see + * [[MappingApi]] for mapping management operations + */ +trait JavaClientMappingApi extends MappingApi with JavaClientHelpers { + _: SettingsApi with IndicesApi with RefreshApi with JavaClientCompanion => + + override private[client] def executeSetMapping( + index: String, + mapping: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "setMapping", + index = Some(index), + retryable = false + )( + apply() + .indices() + .putMapping( + new PutMappingRequest.Builder().index(index).withJson(new StringReader(mapping)).build() + ) + )(_.acknowledged()) + + override private[client] def executeGetMapping(index: String): result.ElasticResult[String] = + executeJavaAction( + operation = "getMapping", + index = Some(index), + retryable = true + )( + apply() + .indices() + .getMapping( + new GetMappingRequest.Builder().index(index).build() + ) + ) { response => + val valueOpt = response.mappings().asScala.get(index) + valueOpt match { + case Some(value) => convertToJson(value) + case None => """"{"properties": {}}""" + } + } + + /** Get the mapping properties of an index. + * + * @param index + * - the name of the index to get the mapping properties for + * @return + * the mapping properties of the index as a JSON string + */ + override def getMappingProperties(index: String): ElasticResult[String] = { + getMapping(index).flatMap { jsonString => + // ✅ Extracting mapping from JSON + ElasticResult.attempt( + new JsonParser().parse(jsonString).getAsJsonObject + ) match { + case ElasticFailure(error) => + logger.error(s"❌ Failed to parse JSON mapping for index '$index': ${error.message}") + return ElasticFailure(error.copy(operation = Some("getMapping"), index = Some(index))) + case ElasticSuccess(indexObj) => + val settingsObj = indexObj + .getAsJsonObject("mappings") + ElasticSuccess(settingsObj.toString) + } + } + } + +} + +/** Elasticsearch client implementation of Refresh API using the Java Client + * @see + * [[RefreshApi]] for index refresh operations + */ +trait JavaClientRefreshApi extends RefreshApi with JavaClientHelpers { + _: JavaClientCompanion => + + override private[client] def executeRefresh(index: String): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "refresh", + index = Some(index), + retryable = false + )( + apply() + .indices() + .refresh( + new RefreshRequest.Builder().index(index).build() + ) + )( + _.shards() + .failed() + .intValue() == 0 + ) + +} + +/** Elasticsearch client implementation of Flush API using the Java Client + * @see + * [[FlushApi]] for index flush operations + */ +trait JavaClientFlushApi extends FlushApi with JavaClientHelpers { + _: JavaClientCompanion => + + override private[client] def executeFlush( + index: String, + force: Boolean, + wait: Boolean + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "flush", + index = Some(index), + retryable = false + )( + apply() + .indices() + .flush( + new FlushRequest.Builder().index(index).force(force).waitIfOngoing(wait).build() + ) + )( + _.shards() + .failed() + .intValue() == 0 + ) + +} + +/** Elasticsearch client implementation of Count API using the Java Client + * @see + * [[CountApi]] for count operations + */ +trait JavaClientCountApi extends CountApi with JavaClientHelpers { + _: JavaClientCompanion => + + override private[client] def executeCount( + query: ElasticQuery + ): result.ElasticResult[Option[Double]] = + executeJavaAction( + operation = "count", + index = Some(query.indices.mkString(",")), + retryable = true + )( + apply() + .count( + new CountRequest.Builder().index(query.indices.asJava).build() + ) + ) { response => + Option(response.count().toDouble) + } + + override private[client] def executeCountAsync( + query: ElasticQuery + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Option[Double]]] = + fromCompletableFuture( + async() + .count( + new CountRequest.Builder().index(query.indices.asJava).build() + ) + ).map { response => + result.ElasticSuccess(Option(response.count().toDouble)) + } + +} + +/** Elasticsearch client implementation of Index API using the Java Client + * @see + * [[IndexApi]] for index operations + */ +trait JavaClientIndexApi extends IndexApi with JavaClientHelpers { + _: RefreshApi with JavaClientCompanion with SerializationApi => + + override private[client] def executeIndex( + index: String, + id: String, + source: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "index", + index = Some(index), + retryable = false + )( + apply() + .index( + new IndexRequest.Builder() + .index(index) + .id(id) + .withJson(new StringReader(source)) + .build() + ) + )( + _.shards() + .failed() + .intValue() == 0 + ) + + override private[client] def executeIndexAsync(index: String, id: String, source: String)(implicit + ec: ExecutionContext + ): Future[result.ElasticResult[Boolean]] = + fromCompletableFuture( + async() + .index( + new IndexRequest.Builder() + .index(index) + .id(id) + .withJson(new StringReader(source)) + .build() + ) + ).map { response => + if (response.shards().failed().intValue() == 0) { + result.ElasticSuccess(true) + } else { + result.ElasticFailure( + client.result.ElasticError(s"Failed to index document with id: $id in index: $index") + ) + } + } + +} + +/** Elasticsearch client implementation of Update API using the Java Client + * @see + * [[UpdateApi]] for update operations + */ +trait JavaClientUpdateApi extends UpdateApi with JavaClientHelpers { + _: RefreshApi with JavaClientCompanion with SerializationApi => + + override private[client] def executeUpdate( + index: String, + id: String, + source: String, + upsert: Boolean + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "update", + index = Some(index), + retryable = false + )( + apply() + .update( + new UpdateRequest.Builder[JMap[String, Object], JMap[String, Object]]() + .index(index) + .id(id) + .doc(mapper.readValue(source, classOf[JMap[String, Object]])) + .docAsUpsert(upsert) + .build(), + classOf[JMap[String, Object]] + ) + )( + _.shards() + .failed() + .intValue() == 0 + ) + + override private[client] def executeUpdateAsync( + index: String, + id: String, + source: String, + upsert: Boolean + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Boolean]] = + fromCompletableFuture( + async() + .update( + new UpdateRequest.Builder[JMap[String, Object], JMap[String, Object]]() + .index(index) + .id(id) + .doc(mapper.readValue(source, classOf[JMap[String, Object]])) + .docAsUpsert(upsert) + .build(), + classOf[JMap[String, Object]] + ) + ).map { response => + if (response.shards().failed().intValue() == 0) { + result.ElasticSuccess(true) + } else { + result.ElasticFailure( + client.result.ElasticError(s"Failed to update document with id: $id in index: $index") + ) + } + } + +} + +/** Elasticsearch client implementation of Delete API using the Java Client + * @see + * [[DeleteApi]] for delete operations + */ +trait JavaClientDeleteApi extends DeleteApi with JavaClientHelpers { + _: RefreshApi with JavaClientCompanion => + + override private[client] def executeDelete( + index: String, + id: String + ): result.ElasticResult[Boolean] = + executeJavaBooleanAction( + operation = "delete", + index = Some(index), + retryable = false + )( + apply() + .delete( + new DeleteRequest.Builder().index(index).id(id).build() + ) + )( + _.shards() + .failed() + .intValue() == 0 + ) + + override private[client] def executeDeleteAsync(index: String, id: String)(implicit + ec: ExecutionContext + ): Future[result.ElasticResult[Boolean]] = + fromCompletableFuture( + async() + .delete( + new DeleteRequest.Builder().index(index).id(id).build() + ) + ).map { response => + if (response.shards().failed().intValue() == 0) { + result.ElasticSuccess(true) + } else { + result.ElasticFailure( + client.result.ElasticError(s"Failed to delete document with id: $id in index: $index") + ) + } + } + +} + +/** Elasticsearch client implementation of Get API using the Java Client + * @see + * [[GetApi]] for get operations + */ +trait JavaClientGetApi extends GetApi with JavaClientHelpers { + _: JavaClientCompanion with SerializationApi => + + override private[client] def executeGet( + index: String, + id: String + ): result.ElasticResult[Option[String]] = + executeJavaAction( + operation = "get", + index = Some(index), + retryable = true + )( + apply() + .get( + new GetRequest.Builder() + .index(index) + .id(id) + .build(), + classOf[JMap[String, Object]] + ) + ) { response => + if (response.found()) { + Some(mapper.writeValueAsString(response.source())) + } else { + None + } + } + + override private[client] def executeGetAsync(index: String, id: String)(implicit + ec: ExecutionContext + ): Future[result.ElasticResult[Option[String]]] = + fromCompletableFuture( + async() + .get( + new GetRequest.Builder() + .index(index) + .id(id) + .build(), + classOf[JMap[String, Object]] + ) + ).map { response => + if (response.found()) { + result.ElasticSuccess(Some(mapper.writeValueAsString(response.source()))) + } else { + result.ElasticSuccess(None) + } + } + +} + +/** Elasticsearch client implementation of Search API using the Java Client + * @see + * [[SearchApi]] for search operations + */ +trait JavaClientSearchApi extends SearchApi with JavaClientHelpers { + _: JavaClientCompanion with SerializationApi => + + override implicit def sqlSearchRequestToJsonQuery(sqlSearch: SQLSearchRequest): String = + implicitly[ElasticSearchRequest](sqlSearch).query + + override private[client] def executeSingleSearch( + elasticQuery: ElasticQuery + ): result.ElasticResult[Option[String]] = + executeJavaAction( + operation = "singleSearch", + index = Some(elasticQuery.indices.mkString(",")), + retryable = true + )( + apply() + .search( + new SearchRequest.Builder() + .index(elasticQuery.indices.asJava) + .withJson( + new StringReader(elasticQuery.query) + ) + .build(), + classOf[JMap[String, Object]] + ) + )(resp => Some(convertToJson(resp))) + + override private[client] def executeMultiSearch( + elasticQueries: ElasticQueries + ): result.ElasticResult[Option[String]] = + executeJavaAction( + operation = "multiSearch", + index = Some(elasticQueries.queries.flatMap(_.indices).distinct.mkString(",")), + retryable = true + ) { + val items = elasticQueries.queries.map { q => + new RequestItem.Builder() + .header(new MultisearchHeader.Builder().index(q.indices.asJava).build()) + .body(new SearchRequestBody.Builder().withJson(new StringReader(q.query)).build()) + .build() + } + + val request = new MsearchRequest.Builder().searches(items.asJava).build() + apply().msearch(request, classOf[JMap[String, Object]]) + }(resp => Some(convertToJson(resp))) + + override private[client] def executeSingleSearchAsync( + elasticQuery: ElasticQuery + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Option[String]]] = + fromCompletableFuture( + async() + .search( + new SearchRequest.Builder() + .index(elasticQuery.indices.asJava) + .withJson(new StringReader(elasticQuery.query)) + .build(), + classOf[JMap[String, Object]] + ) + ).map { response => + result.ElasticSuccess(Some(convertToJson(response))) + } + + override private[client] def executeMultiSearchAsync( + elasticQueries: ElasticQueries + )(implicit ec: ExecutionContext): Future[result.ElasticResult[Option[String]]] = + fromCompletableFuture { + val items = elasticQueries.queries.map { q => + new RequestItem.Builder() + .header(new MultisearchHeader.Builder().index(q.indices.asJava).build()) + .body(new SearchRequestBody.Builder().withJson(new StringReader(q.query)).build()) + .build() + } + + val request = new MsearchRequest.Builder().searches(items.asJava).build() + async().msearch(request, classOf[JMap[String, Object]]) + } + .map { response => + result.ElasticSuccess(Some(convertToJson(response))) + } + +} + +/** Elasticsearch client implementation of Bulk API using the Java Client + * @see + * [[BulkApi]] for bulk operations + */ +trait JavaClientBulkApi extends BulkApi with JavaClientHelpers { + _: RefreshApi with SettingsApi with IndexApi with JavaClientCompanion => + override type BulkActionType = BulkOperation + override type BulkResultType = BulkResponse + + override implicit private[client] def toBulkElasticAction(a: BulkOperation): BulkElasticAction = + new BulkElasticAction { + override def index: String = { + a match { + case op if op.isIndex => op.index().index() + case op if op.isDelete => op.delete().index() + case op if op.isUpdate => op.update().index() + case _ => + throw new IllegalArgumentException(s"Unsupported bulk operation type: ${a.getClass}") + } + } + } + + /** Basic flow for executing a bulk action. This method must be implemented by concrete classes + * depending on the Elasticsearch version and client used. + * + * @param bulkOptions + * configuration options + * @return + * Flow transforming bulk actions into results + */ + override private[client] def bulkFlow(implicit + bulkOptions: BulkOptions, + system: ActorSystem + ): Flow[Seq[A], R, NotUsed] = { + val parallelism = Math.max(1, bulkOptions.balance) + Flow[Seq[A]] + .named("bulk") + .mapAsyncUnordered[R](parallelism) { items => + val request = + new BulkRequest.Builder().index(bulkOptions.defaultIndex).operations(items.asJava).build() + Try(apply().bulk(request)) match { + case Success(response) => + if (response.errors()) { + val failedItems = response.items().asScala.filter(_.status() >= 400) + if (failedItems.nonEmpty) { + val errorMessages = + failedItems + .take(10) + .map(i => s"(${i.index()}, ${i.id()}) -> ${i.error().reason()}") + .mkString(", ") + logger.error(s"Bulk operation failed for items: $errorMessages") + } else { + logger.warn("Bulk operation reported errors but no failed items found") + } + } + Future.successful(response) + case Failure(exception) => + logger.error(s"Bulk operation failed : ${exception.getMessage}") + Future.failed(exception) + } + } + } + + /** Convert a BulkResultType into individual results. This method must extract the successes and + * failures from the ES response. + * + * @param result + * raw result from the bulk + * @return + * sequence of Right(id) for success or Left(failed) for failure + */ + override private[client] def extractBulkResults( + result: BulkResponse, + originalBatch: Seq[BulkItem] + ): Seq[Either[FailedDocument, SuccessfulDocument]] = { + // no results at all + if ( + originalBatch.nonEmpty && + (result == null || (result.items() == null || result.items().isEmpty)) + ) { + logger.error("Bulk result is null or has no items") + return originalBatch.map { item => + Left( + FailedDocument( + id = item.id.getOrElse("unknown"), + index = item.index, + document = item.document, + error = BulkError( + message = "Null bulk result", + `type` = "internal_error", + status = 500 + ), + retryable = false + ) + ) + } + } + + // process failed items + val failedItems = + result + .items() + .asScala + .filter(item => Option(item.error()).isDefined) + .map { item => + val errorStatus = item.status() + val errorType = item.error().`type` + val errorReason = item.error().reason() + + val originalItemOpt = originalBatch.find { originalItem => + originalItem.index == item.index() && originalItem.id.contains(item.id()) + } + + // Determine if the error is retryable + val isRetryable = + originalItemOpt.isDefined && (BulkErrorAnalyzer.isRetryable(errorStatus) || + BulkErrorAnalyzer.isRetryableByType(errorType)) + + val document = originalItemOpt.map(_.document).getOrElse("") + Left( + FailedDocument( + id = item.id(), + index = item.index(), + document = document, + error = BulkError( + message = errorReason, + `type` = errorType, + status = errorStatus + ), + retryable = isRetryable + ) + ) + } + .toSeq + + // process successful items + val successfulItems = result + .items() + .asScala + .filter(item => Option(item.error()).isEmpty) + .map { item => + Right( + SuccessfulDocument( + id = item.id(), + index = item.index() + ) + ) + } + .toSeq + + val results = failedItems ++ successfulItems + + // if no individual results but overall failure, mark all as failed + if (results.isEmpty && originalBatch.nonEmpty) { + logger.error("Bulk operation failed with no individual item results") + return originalBatch.map { item => + Left( + FailedDocument( + id = item.id.getOrElse("unknown"), + index = item.index, + document = item.document, + error = BulkError( + message = "Bulk operation failed with no individual item results", + `type` = "internal_error", + status = 500 + ), + retryable = false + ) + ) + } + } + + results + } + + override private[client] def toBulkAction(bulkItem: BulkItem): A = { + import bulkItem._ + + action match { + case BulkAction.UPDATE => + new BulkOperation.Builder() + .update( + new UpdateOperation.Builder() + .index(bulkItem.index) + .id(id.orNull) + .action( + new UpdateAction.Builder[JMap[String, Object], JMap[String, Object]]() + .doc(mapper.readValue(document, classOf[JMap[String, Object]])) + .docAsUpsert(true) + .build() + ) + .build() + ) + .build() + + case BulkAction.DELETE => + val deleteId = id.getOrElse { + throw new IllegalArgumentException(s"Missing id for delete on index ${bulkItem.index}") + } + new BulkOperation.Builder() + .delete(new DeleteOperation.Builder().index(bulkItem.index).id(deleteId).build()) + .build() + + case _ => + new BulkOperation.Builder() + .index( + new IndexOperation.Builder[JMap[String, Object]]() + .index(bulkItem.index) + .id(id.orNull) + .document(mapper.readValue(document, classOf[JMap[String, Object]])) + .build() + ) + .build() + } + } + + /** Conversion BulkActionType -> BulkItem */ + override private[client] def actionToBulkItem(action: BulkActionType): BulkItem = + action match { + case op if op.isIndex => + BulkItem( + index = op.index().index(), + id = Option(op.index().id()), + document = mapper.writeValueAsString(op.index().document()), + action = BulkAction.INDEX, + parent = None + ) + case op if op.isDelete => + BulkItem( + index = op.delete().index(), + id = Some(op.delete().id()), + document = "", + action = BulkAction.DELETE, + parent = None + ) + case op if op.isUpdate => + BulkItem( + index = op.update().index(), + id = Some(op.update().id()), + document = mapper.writeValueAsString(op.update().action().doc()), + action = BulkAction.UPDATE, + parent = None + ) + case _ => + throw new IllegalArgumentException(s"Unsupported bulk operation type: ${action.getClass}") + } + +} + +/** Elasticsearch client implementation of Scroll API using the Java Client + * @see + * [[ScrollApi]] for scroll operations + */ +trait JavaClientScrollApi extends ScrollApi with JavaClientHelpers { + _: VersionApi with SearchApi with JavaClientCompanion => + + /** Classic scroll (works for both hits and aggregations) + */ + override private[client] def scrollClassic( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation], + config: ScrollConfig + )(implicit system: ActorSystem): scaladsl.Source[Map[String, Any], NotUsed] = { + implicit val ec: ExecutionContext = system.dispatcher + Source + .unfoldAsync[Option[String], Seq[Map[String, Any]]](None) { scrollIdOpt => + retryWithBackoff(config.retryConfig) { + Future { + scrollIdOpt match { + case None => + // Initial search with scroll + logger.info( + s"Starting classic scroll on indices: ${elasticQuery.indices.mkString(", ")}" + ) + + val searchRequest = new SearchRequest.Builder() + .index(elasticQuery.indices.asJava) + .withJson(new StringReader(elasticQuery.query)) + .scroll(Time.of(t => t.time(config.keepAlive))) + .size(config.scrollSize) + .build() + + val response = apply().search(searchRequest, classOf[JMap[String, Object]]) + + if ( + response.shards() != null && response + .shards() + .failed() != null && response.shards().failed().intValue() > 0 + ) { + val failures = response.shards().failures() + val errorMsg = if (failures != null && !failures.isEmpty) { + failures.asScala.map(_.reason()).mkString("; ") + } else { + "Unknown shard failure" + } + throw new IOException(s"Initial scroll failed: $errorMsg") + } + + val scrollId = response.scrollId() + + if (scrollId == null) { + throw new IllegalStateException("Scroll ID is null in response") + } + + val results = extractAllResults(Left(response), fieldAliases, aggregations) + + if (results.isEmpty || scrollId == null) None + else Some((Some(scrollId), results)) + + case Some(scrollId) => + // Subsequent scroll + logger.debug(s"Fetching next scroll batch (scrollId: $scrollId)") + + val scrollRequest = new ScrollRequest.Builder() + .scrollId(scrollId) + .scroll(Time.of(t => t.time(config.keepAlive))) + .build() + + val response = apply().scroll(scrollRequest, classOf[JMap[String, Object]]) + + if ( + response.shards() != null && response + .shards() + .failed() != null && response.shards().failed().intValue() > 0 + ) { + clearScroll(scrollId) + val failures = response.shards().failures() + val errorMsg = if (failures != null && !failures.isEmpty) { + failures.asScala.map(_.reason()).mkString("; ") + } else { + "Unknown shard failure" + } + throw new IOException(s"Scroll continuation failed: $errorMsg") + } + + val newScrollId = response.scrollId() + val results = extractAllResults(Right(response), fieldAliases, aggregations) + + if (results.isEmpty) { + clearScroll(scrollId) + None + } else { + Some((Some(newScrollId), results)) + } + } + } + }(system, logger).recover { case ex: Exception => + logger.error(s"Scroll failed after retries: ${ex.getMessage}", ex) + scrollIdOpt.foreach(clearScroll) + None + } + } + .mapConcat(identity) + } + + /** Search After (only for hits, more efficient) + */ + override private[client] def searchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean = false + )(implicit system: ActorSystem): scaladsl.Source[Map[String, Any], NotUsed] = { + pitSearchAfter(elasticQuery, fieldAliases, config, hasSorts) + } + + /** PIT + search_after (recommended for ES 7.10+, required for ES 8+) + * + * Advantages: + * - More efficient than classic scroll (stateless) + * - Better for deep pagination + * - Can be parallelized + * - Lower memory footprint on ES cluster + * + * @note + * Only works for hits, not for aggregations (use scrollSourceClassic for aggregations) + */ + private[client] def pitSearchAfter( + elasticQuery: ElasticQuery, + fieldAliases: Map[String, String], + config: ScrollConfig, + hasSorts: Boolean = false + )(implicit system: ActorSystem): Source[Map[String, Any], NotUsed] = { + implicit val ec: ExecutionContext = system.dispatcher + + // Step 1: Open PIT + val pitIdFuture: Future[String] = openPit(elasticQuery.indices, config.keepAlive) + + Source + .futureSource { + pitIdFuture.map { pitId => + logger.info(s"Opened PIT: $pitId for indices: ${elasticQuery.indices.mkString(", ")}") + + Source + .unfoldAsync[Option[Seq[Any]], Seq[Map[String, Any]]](None) { searchAfterOpt => + retryWithBackoff(config.retryConfig) { + Future { + searchAfterOpt match { + case None => + logger.info(s"Starting PIT search_after (pitId: ${pitId.take(20)}...)") + case Some(values) => + logger.debug( + s"Fetching next PIT search_after batch (after: ${if (values.length > 3) + s"[${values.take(3).mkString(", ")}...]" + else values.mkString(", ")})" + ) + } + + // Build search request with PIT + val requestBuilder = new SearchRequest.Builder() + .size(config.scrollSize) + .pit( + PointInTimeReference + .of(p => p.id(pitId).keepAlive(Time.of(t => t.time(config.keepAlive)))) + ) + + // Parse query to add query clause (not indices, they're in PIT) + val queryJson = new JsonParser().parse(elasticQuery.query).getAsJsonObject + + // Extract query clause if present + if (queryJson.has("query")) { + requestBuilder.withJson(new StringReader(elasticQuery.query)) + } + + // Check if sorts already exist in the query + if (!hasSorts && !queryJson.has("sort")) { + logger.warn( + "No sort fields in query for PIT search_after, adding default _shard_doc sort. " + + "_shard_doc is more efficient than _id for PIT." + ) + requestBuilder.sort( + SortOptions.of { sortBuilder => + sortBuilder.field( + FieldSort.of(fieldSortBuilder => + fieldSortBuilder.field("_shard_doc").order(SortOrder.Asc) + ) + ) + } + ) + } else if (hasSorts && queryJson.has("sort")) { + // Sorts already present, check that a tie-breaker exists + val existingSorts = queryJson.getAsJsonArray("sort") + val hasShardDocSort = existingSorts.asScala.exists { sortElem => + sortElem.isJsonObject && ( + sortElem.getAsJsonObject.has("_shard_doc") || + sortElem.getAsJsonObject.has("_id") + ) + } + if (!hasShardDocSort) { + // Add _id as tie-breaker + logger.debug("Adding _shard_doc as tie-breaker to existing sorts") + requestBuilder.sort( + SortOptions.of { sortBuilder => + sortBuilder.field( + FieldSort.of(fieldSortBuilder => + fieldSortBuilder.field("_shard_doc").order(SortOrder.Asc) + ) + ) + } + ) + } + } + + // Add search_after if available + searchAfterOpt.foreach { searchAfter => + val fieldValues: Seq[FieldValue] = searchAfter.map { + case s: String => FieldValue.of(s) + case i: Int => FieldValue.of(i.toLong) + case l: Long => FieldValue.of(l) + case d: Double => FieldValue.of(d) + case b: Boolean => FieldValue.of(b) + case other => FieldValue.of(other.toString) + } + requestBuilder.searchAfter(fieldValues.asJava) + } + + val response = apply().search( + requestBuilder.build(), + classOf[JMap[String, Object]] + ) + + // Check errors + if ( + response.shards() != null && + response.shards().failed() != null && + response.shards().failed().intValue() > 0 + ) { + val failures = response.shards().failures() + val errorMsg = if (failures != null && !failures.isEmpty) { + failures.asScala.map(_.reason()).mkString("; ") + } else { + "Unknown shard failure" + } + throw new IOException(s"PIT search_after failed: $errorMsg") + } + + val hits = extractHitsOnly(response, fieldAliases) + + if (hits.isEmpty) { + // Close PIT when done + closePit(pitId) + None + } else { + val lastHit = response.hits().hits().asScala.lastOption + val nextSearchAfter = lastHit.flatMap { hit => + val sortValues = hit.sort().asScala + if (sortValues.nonEmpty) { + Some(sortValues.map { fieldValue => + if (fieldValue.isString) fieldValue.stringValue() + else if (fieldValue.isDouble) fieldValue.doubleValue() + else if (fieldValue.isLong) fieldValue.longValue() + else if (fieldValue.isBoolean) fieldValue.booleanValue() + else if (fieldValue.isNull) null + else fieldValue.toString + }.toSeq) + } else { + None + } + } + + logger.debug(s"Retrieved ${hits.size} documents, continuing with PIT") + Some((nextSearchAfter, hits)) + } + } + }(system, logger).recover { case ex: Exception => + logger.error(s"PIT search_after failed after retries: ${ex.getMessage}", ex) + closePit(pitId) + None + } + } + .watchTermination() { (_, done) => + // Cleanup PIT on stream completion/failure + done.onComplete { + case scala.util.Success(_) => + logger.info( + s"PIT search_after completed successfully, closing PIT: ${pitId.take(20)}..." + ) + closePit(pitId) + case scala.util.Failure(ex) => + logger.error( + s"PIT search_after failed: ${ex.getMessage}, closing PIT: ${pitId.take(20)}..." + ) + closePit(pitId) + } + NotUsed + } + .mapConcat(identity) + } + } + .mapMaterializedValue(_ => NotUsed) + } + + /** Open a Point In Time + */ + private def openPit(indices: Seq[String], keepAlive: String)(implicit + ec: ExecutionContext + ): Future[String] = { + Future { + logger.debug(s"Opening PIT for indices: ${indices.mkString(", ")} with keepAlive: $keepAlive") + + val openPitRequest = new OpenPointInTimeRequest.Builder() + .index(indices.asJava) + .keepAlive(Time.of(t => t.time(keepAlive))) + .build() + + val response = apply().openPointInTime(openPitRequest) + val pitId = response.id() + + if (pitId == null || pitId.isEmpty) { + throw new IllegalStateException("PIT ID is null or empty in response") + } + + logger.info(s"PIT opened successfully: ${pitId.take(20)}... (keepAlive: $keepAlive)") + pitId + }.recoverWith { case ex: Exception => + logger.error(s"Failed to open PIT: ${ex.getMessage}", ex) + Future.failed( + new IOException(s"Failed to open PIT for indices: ${indices.mkString(", ")}", ex) + ) + } + } + + /** Close a Point In Time + */ + private def closePit(pitId: String): Unit = { + Try { + logger.debug(s"Closing PIT: ${pitId.take(20)}...") + + val closePitRequest = new ClosePointInTimeRequest.Builder() + .id(pitId) + .build() + + val response = apply().closePointInTime(closePitRequest) + + if (response.succeeded()) { + logger.info(s"PIT closed successfully: ${pitId.take(20)}...") + } else { + logger.warn(s"PIT close reported failure: ${pitId.take(20)}...") + } + }.recover { case ex: Exception => + logger.warn(s"Failed to close PIT ${pitId.take(20)}...: ${ex.getMessage}") + } + } + + /** Extract ALL results: hits + aggregations This is crucial for queries with aggregations (GROUP + * BY, COUNT, AVG, etc.) + */ + private def extractAllResults( + response: Either[SearchResponse[JMap[String, Object]], ScrollResponse[JMap[String, Object]]], + fieldAliases: Map[String, String], + aggregations: Map[String, SQLAggregation] + ): Seq[Map[String, Any]] = { + val jsonString = + response match { + case Left(l) => convertToJson(l) + case Right(r) => convertToJson(r) + } + val sqlResponse = + ElasticResponse("", jsonString, fieldAliases, aggregations.map(kv => kv._1 -> kv._2)) + + parseResponse(sqlResponse) match { + case Success(rows) => + logger.debug(s"Parsed ${rows.size} rows from response (hits + aggregations)") + rows + case Failure(ex) => + logger.error(s"Failed to parse scroll response: ${ex.getMessage}", ex) + Seq.empty + } + } + + /** Extract ONLY hits (for search_after optimization) Ignores aggregations for better performance + */ + private def extractHitsOnly( + response: SearchResponse[JMap[String, Object]], + fieldAliases: Map[String, String] + ): Seq[Map[String, Any]] = { + val jsonString = convertToJson(response) + val sqlResponse = ElasticResponse("", jsonString, fieldAliases, Map.empty) + + parseResponse(sqlResponse) match { + case Success(rows) => + logger.debug(s"Parsed ${rows.size} hits from response") + rows + case Failure(ex) => + logger.error(s"Failed to parse search after response: ${ex.getMessage}", ex) + Seq.empty + } + } + + /** Clear scroll context to free resources + */ + private def clearScroll(scrollId: String): Unit = { + Try { + logger.debug(s"Clearing scroll: $scrollId") + val clearRequest = new ClearScrollRequest.Builder() + .scrollId(scrollId) + .build() + apply().clearScroll(clearRequest) + }.recover { case ex: Exception => + logger.warn(s"Failed to clear scroll $scrollId: ${ex.getMessage}") + } + } +} diff --git a/es8/java/src/main/scala/app/softnetwork/elastic/client/java/ElasticsearchClientCompanion.scala b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientCompanion.scala similarity index 53% rename from es8/java/src/main/scala/app/softnetwork/elastic/client/java/ElasticsearchClientCompanion.scala rename to es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientCompanion.scala index 0fcd0123..0d733502 100644 --- a/es8/java/src/main/scala/app/softnetwork/elastic/client/java/ElasticsearchClientCompanion.scala +++ b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientCompanion.scala @@ -16,13 +16,12 @@ package app.softnetwork.elastic.client.java -import app.softnetwork.elastic.client.ElasticConfig +import app.softnetwork.elastic.client.ElasticClientCompanion import co.elastic.clients.elasticsearch.{ElasticsearchAsyncClient, ElasticsearchClient} import co.elastic.clients.json.jackson.JacksonJsonpMapper import co.elastic.clients.transport.rest_client.RestClientTransport import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.ClassTagExtensions -import org.apache.http.HttpHost import org.apache.http.auth.{AuthScope, UsernamePasswordCredentials} import org.apache.http.impl.client.BasicCredentialsProvider import org.apache.http.impl.nio.client.HttpAsyncClientBuilder @@ -32,19 +31,51 @@ import org.slf4j.{Logger, LoggerFactory} import java.util.concurrent.CompletableFuture import scala.concurrent.{Future, Promise} -trait ElasticsearchClientCompanion { +trait JavaClientCompanion extends ElasticClientCompanion[ElasticsearchClient] { val logger: Logger = LoggerFactory getLogger getClass.getName - def elasticConfig: ElasticConfig + @volatile private var asyncClient: Option[ElasticsearchAsyncClient] = None - private var client: Option[ElasticsearchClient] = None - - private var asyncClient: Option[ElasticsearchAsyncClient] = None + /** Lock object for synchronized initialization + */ + private val lock = new Object() lazy val mapper: ObjectMapper with ClassTagExtensions = new ObjectMapper() with ClassTagExtensions - def transport: RestClientTransport = { + def async(): ElasticsearchAsyncClient = { + // First check (no locking) - fast path for already initialized client + asyncClient match { + case Some(c) => c + case None => + // Second check with lock - slow path for initialization + lock.synchronized { + asyncClient match { + case Some(c) => + c // Another thread initialized while we were waiting + case None => + val c = createAsyncClient() + asyncClient = Some(c) + logger.info( + s"Elasticsearch async Client initialized for ${elasticConfig.credentials.url}" + ) + c + } + } + } + } + + private def createAsyncClient(): ElasticsearchAsyncClient = { + try { + new ElasticsearchAsyncClient(buildTransport()) + } catch { + case ex: Exception => + logger.error(s"Failed to create ElasticsearchAsyncClient: ${ex.getMessage}", ex) + throw new IllegalStateException("Cannot create Elasticsearch async client", ex) + } + } + + private def buildTransport(): RestClientTransport = { val credentialsProvider = new BasicCredentialsProvider() if (elasticConfig.credentials.username.nonEmpty) { credentialsProvider.setCredentials( @@ -57,7 +88,7 @@ trait ElasticsearchClientCompanion { } val restClientBuilder: RestClientBuilder = RestClient .builder( - HttpHost.create(elasticConfig.credentials.url) + parseHttpHost(elasticConfig.credentials.url) ) .setHttpClientConfigCallback((httpAsyncClientBuilder: HttpAsyncClientBuilder) => httpAsyncClientBuilder.setDefaultCredentialsProvider(credentialsProvider) @@ -65,23 +96,34 @@ trait ElasticsearchClientCompanion { new RestClientTransport(restClientBuilder.build(), new JacksonJsonpMapper()) } - def apply(): ElasticsearchClient = { - client match { - case Some(c) => c - case _ => - val c = new ElasticsearchClient(transport) - client = Some(c) - c + /** Create and configure Elasticsearch Client + */ + override protected def createClient(): ElasticsearchClient = { + try { + new ElasticsearchClient(buildTransport()) + } catch { + case ex: Exception => + logger.error(s"Failed to create ElasticsearchClient: ${ex.getMessage}", ex) + throw new IllegalStateException("Cannot create Elasticsearch client", ex) } } - def async(): ElasticsearchAsyncClient = { - asyncClient match { - case Some(c) => c - case _ => - val c = new ElasticsearchAsyncClient(transport) - asyncClient = Some(c) - c + /** Test connection to Elasticsearch cluster + * + * @return + * true if connection is successful + */ + override def testConnection(): Boolean = { + try { + val c = apply() + val response = c.info() + logger.info(s"Connected to Elasticsearch ${response.version().number()}") + true + } catch { + case ex: Exception => + logger.error(s"Failed to connect to Elasticsearch: ${ex.getMessage}", ex) + incrementFailures() + false } } diff --git a/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientConversion.scala b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientConversion.scala new file mode 100644 index 00000000..20729e6b --- /dev/null +++ b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientConversion.scala @@ -0,0 +1,46 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.java + +import co.elastic.clients.json.JsonpSerializable +import co.elastic.clients.json.jackson.JacksonJsonpMapper + +import java.io.{IOException, StringWriter} +import scala.util.Try + +trait JavaClientConversion { _: JavaClientCompanion => + private[this] val jsonpMapper = new JacksonJsonpMapper(mapper) + + /** Convert any Elasticsearch response to JSON string */ + protected def convertToJson[T <: JsonpSerializable](response: T): String = { + val stringWriter = new StringWriter() + val generator = jsonpMapper.jsonProvider().createGenerator(stringWriter) + try { + response.serialize(generator, jsonpMapper) + generator.flush() + stringWriter.toString + } catch { + case ex: Exception => + logger.error(s"Failed to convert response to JSON: ${ex.getMessage}", ex) + throw new IOException("Failed to serialize Elasticsearch response", ex) + } finally { + Try(generator.close()).failed.foreach { ex => + logger.warn(s"Failed to close JSON generator: ${ex.getMessage}") + } + } + } +} diff --git a/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientHelpers.scala b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientHelpers.scala new file mode 100644 index 00000000..940aa01e --- /dev/null +++ b/es9/java/src/main/scala/app/softnetwork/elastic/client/java/JavaClientHelpers.scala @@ -0,0 +1,301 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.java + +import app.softnetwork.elastic.client.ElasticClientHelpers +import app.softnetwork.elastic.client.result.{ElasticError, ElasticResult} + +import scala.util.{Failure, Success, Try} + +trait JavaClientHelpers extends ElasticClientHelpers with JavaClientConversion { + _: JavaClientCompanion => + + // ======================================================================== + // GENERIC METHODS FOR EXECUTING JAVA CLIENT ACTIONS + // ======================================================================== + + //format:off + /** Execute a Java Client action with a generic transformation of the result. + * + * @tparam Resp + * type of the response + * @tparam T + * type of the desired final result + * @param operation + * name of the operation (for logging and error context) + * @param index + * relevant index (optional, for logging) + * @param retryable + * true if the operation can be retried in case of a transient error + * @param action + * function executing the action and returning the response + * @param transformer + * function transforming the response into T + * @return + * ElasticResult[T] + * + * @example + * {{{ + * executeJavaAction[CreateIndexResponse, Boolean]( + * operation = "createIndex", + * index = Some("my-index"), + * retryable = false + * )( + * action = apply().indices().create(builder => builder.index("my-index")) + * )( + * transformer = resp => resp.acknowledged() + * ) + * }}} + */ + //format:on + private[client] def executeJavaAction[Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + transformer: Resp => T + ): ElasticResult[T] = { + val indexStr = index.map(i => s" on index '$i'").getOrElse("") + logger.debug(s"Executing operation '$operation'$indexStr") + + // ✅ Execution with exception handling + val tryResult: Try[Resp] = Try { + action + } + + // ✅ Conversion to ElasticResult[Resp] + val elasticResult: ElasticResult[Resp] = tryResult match { + case Success(result) => + ElasticResult.success(result) + case Failure(ex: co.elastic.clients.elasticsearch._types.ElasticsearchException) => + // Extract error details from Elasticsearch exception + val statusCode = Option(ex.status()).map(_.intValue()) + val errorType = Option(ex.error()).flatMap(e => Option(e.`type`())) + val reason = Option(ex.error()).flatMap(e => Option(e.reason())) + + val message = + s"Elasticsearch error during $operation: ${errorType.getOrElse("unknown")} - ${reason + .getOrElse(ex.getMessage)}" + logger.error(s"$message$indexStr", ex) + + ElasticResult.failure( + ElasticError( + message = message, + cause = Some(ex), + statusCode = statusCode, + operation = Some(operation) + ) + ) + case Failure(ex: java.io.IOException) => + logger.error(s"IO exception during operation '$operation'$indexStr: ${ex.getMessage}", ex) + ElasticResult.failure( + ElasticError( + message = s"IO error during $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + case Failure(ex) => + logger.error(s"Exception during operation '$operation'$indexStr: ${ex.getMessage}", ex) + ElasticResult.failure( + ElasticError( + message = s"Exception during $operation: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + } + + // ✅ Apply transformation + elasticResult.flatMap { result => + Try(transformer(result)) match { + case Success(transformed) => + logger.debug(s"Operation '$operation'$indexStr succeeded") + ElasticResult.success(transformed) + case Failure(ex) => + logger.error(s"Transformation failed for operation '$operation'$indexStr", ex) + ElasticResult.failure( + ElasticError( + message = s"Failed to transform result: ${ex.getMessage}", + cause = Some(ex), + statusCode = None, + operation = Some(operation) + ) + ) + } + } + } + + /** Simplified variant for operations returning Boolean values (acknowledged). + * + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param action + * function executing the action + * @param acknowledgedExtractor + * function to extract the acknowledged status + * @return + * ElasticResult[Boolean] + */ + private[client] def executeJavaBooleanAction[Resp]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + acknowledgedExtractor: Resp => Boolean + ): ElasticResult[Boolean] = { + executeJavaAction[Resp, Boolean](operation, index, retryable)(action)(acknowledgedExtractor) + } + + //format:off + /** Variant to execute an action and extract a specific field from the response. + * + * @tparam Resp + * type of the response + * @tparam T + * type of the final result + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param action + * function executing the action + * @param extractor + * function extracting T from the response + * @return + * ElasticResult[T] + * + * @example + * {{{ + * executeJavaWithExtractor[CountResponse, Long]( + * operation = "countDocuments", + * index = Some("my-index") + * )( + * action = apply().count(builder => builder.index("my-index")) + * )( + * extractor = resp => resp.count() + * ) + * }}} + */ + //format:on + private[client] def executeJavaWithExtractor[Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + extractor: Resp => T + ): ElasticResult[T] = { + executeJavaAction[Resp, T](operation, index, retryable)(action)(extractor) + } + + //format:off + /** Asynchronous variant to execute a Java Client action. Note: The Java Client doesn't have + * native async support, so we wrap in Future. + * + * @tparam Resp + * type of the response + * @tparam T + * type of the desired final result + * @param operation + * name of the operation + * @param index + * relevant index (optional) + * @param retryable + * true if retryable + * @param action + * function executing the action + * @param transformer + * function transforming the response into T + * @return + * Future of [ElasticResult[T] + * + * @example + * {{{ + * executeAsyncJavaAction[IndexResponse, String]( + * operation = "indexDocument", + * index = Some("my-index") + * )( + * action = apply().index(builder => builder.index("my-index").document(doc)) + * )( + * transformer = resp => resp.id() + * ) + * }}} + */ + //format:on + private[client] def executeAsyncJavaAction[Resp, T]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + transformer: Resp => T + )(implicit ec: scala.concurrent.ExecutionContext): scala.concurrent.Future[ElasticResult[T]] = { + val indexStr = index.map(i => s" on index '$i'").getOrElse("") + logger.debug(s"Executing operation '$operation'$indexStr asynchronously") + + scala.concurrent.Future { + executeJavaAction[Resp, T](operation, index, retryable)(action)(transformer) + } + } + + /** Simplified asynchronous variant for operations returning Boolean values. + * + * @param operation + * name of the operation + * @param index + * index concerned (optional) + * @param retryable + * true if retryable + * @param action + * function executing the action + * @param acknowledgedExtractor + * function to extract the acknowledged status + * @return + * Future of ElasticResult[Boolean] + */ + private[client] def executeAsyncJavaBooleanAction[Resp]( + operation: String, + index: Option[String] = None, + retryable: Boolean = true + )( + action: => Resp + )( + acknowledgedExtractor: Resp => Boolean + )(implicit + ec: scala.concurrent.ExecutionContext + ): scala.concurrent.Future[ElasticResult[Boolean]] = { + executeAsyncJavaAction[Resp, Boolean](operation, index, retryable)(action)( + acknowledgedExtractor + ) + } + +} diff --git a/es9/java/src/main/scala/app/softnetwork/elastic/client/spi/JavaClientSpi.scala b/es9/java/src/main/scala/app/softnetwork/elastic/client/spi/JavaClientSpi.scala new file mode 100644 index 00000000..9b1657b4 --- /dev/null +++ b/es9/java/src/main/scala/app/softnetwork/elastic/client/spi/JavaClientSpi.scala @@ -0,0 +1,46 @@ +/* + * Copyright 2025 SOFTNETWORK + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package app.softnetwork.elastic.client.spi + +import app.softnetwork.elastic.client.ElasticClientApi +import app.softnetwork.elastic.client.java.JavaClientApi +import com.typesafe.config.Config + +class JavaClientSpi extends ElasticClientSpi { + + //format:off + /** Creates an Elasticsearch client instance. + * + * @param conf + * Typesafe configuration containing Elasticsearch parameters + * @return + * Configured ElasticClientApi instance + * @example + * {{{ + * class MyElasticClientProvider extends ElasticClientSpi { + * override def client(config: Config): ElasticClientApi = { + * new MyElasticClientImpl(config) + * } + * } + * }}} + */ + //format:on + override def client(conf: Config): ElasticClientApi = + new JavaClientApi { + override def config: Config = conf + } +} diff --git a/es9/java/src/main/scala/app/softnetwork/elastic/persistence/query/ElasticsearchClientProvider.scala b/es9/java/src/main/scala/app/softnetwork/elastic/persistence/query/ElasticsearchClientProvider.scala deleted file mode 100644 index 33b2bd05..00000000 --- a/es9/java/src/main/scala/app/softnetwork/elastic/persistence/query/ElasticsearchClientProvider.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright 2025 SOFTNETWORK - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package app.softnetwork.elastic.persistence.query - -import app.softnetwork.elastic.client.java.ElasticsearchClientApi -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.model.Timestamped - -trait ElasticsearchClientProvider[T <: Timestamped] - extends ElasticProvider[T] - with ElasticsearchClientApi { - _: ManifestWrapper[T] => - -} diff --git a/es9/java/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithJavaProvider.scala b/es9/java/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithJavaProvider.scala deleted file mode 100644 index 2e2add10..00000000 --- a/es9/java/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStreamWithJavaProvider.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2025 SOFTNETWORK - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package app.softnetwork.elastic.persistence.query - -import app.softnetwork.persistence.message.CrudEvent -import app.softnetwork.persistence.model.Timestamped -import app.softnetwork.persistence.query.{JournalProvider, OffsetProvider} - -trait State2ElasticProcessorStreamWithJavaProvider[T <: Timestamped, E <: CrudEvent] - extends State2ElasticProcessorStream[T, E] - with ElasticsearchClientProvider[T] { _: JournalProvider with OffsetProvider => } diff --git a/es9/java/src/test/scala/app/softnetwork/elastic/client/ElasticsearchClientSpec.scala b/es9/java/src/test/scala/app/softnetwork/elastic/client/ElasticsearchClientSpec.scala deleted file mode 100644 index e0165b77..00000000 --- a/es9/java/src/test/scala/app/softnetwork/elastic/client/ElasticsearchClientSpec.scala +++ /dev/null @@ -1,27 +0,0 @@ -package app.softnetwork.elastic.client - -import app.softnetwork.elastic.client.ElasticsearchProviders.{ - BinaryProvider, - ParentProvider, - PersonProvider, - SampleProvider -} -import app.softnetwork.elastic.model.{Binary, Parent, Sample} -import app.softnetwork.elastic.persistence.query.ElasticProvider -import app.softnetwork.persistence.person.model.Person - -class ElasticsearchClientSpec extends ElasticClientSpec { - - lazy val pClient: ElasticProvider[Person] with ElasticClientApi = new PersonProvider( - elasticConfig - ) - lazy val sClient: ElasticProvider[Sample] with ElasticClientApi = new SampleProvider( - elasticConfig - ) - lazy val bClient: ElasticProvider[Binary] with ElasticClientApi = new BinaryProvider( - elasticConfig - ) - lazy val parentClient: ElasticProvider[Parent] with ElasticClientApi = new ParentProvider( - elasticConfig - ) -} diff --git a/es9/java/src/test/scala/app/softnetwork/elastic/client/ElasticsearchProviders.scala b/es9/java/src/test/scala/app/softnetwork/elastic/client/ElasticsearchProviders.scala deleted file mode 100644 index 754a3416..00000000 --- a/es9/java/src/test/scala/app/softnetwork/elastic/client/ElasticsearchProviders.scala +++ /dev/null @@ -1,51 +0,0 @@ -package app.softnetwork.elastic.client - -import app.softnetwork.elastic.model.{Binary, Parent, Sample} -import app.softnetwork.elastic.persistence.query.ElasticsearchClientProvider -import app.softnetwork.persistence.ManifestWrapper -import app.softnetwork.persistence.person.model.Person -import co.elastic.clients.elasticsearch.ElasticsearchClient -import com.typesafe.config.Config - -object ElasticsearchProviders { - - class PersonProvider(es: Config) - extends ElasticsearchClientProvider[Person] - with ManifestWrapper[Person] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val elasticsearchClient: ElasticsearchClient = apply() - } - - class SampleProvider(es: Config) - extends ElasticsearchClientProvider[Sample] - with ManifestWrapper[Sample] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val elasticsearchClient: ElasticsearchClient = apply() - } - - class BinaryProvider(es: Config) - extends ElasticsearchClientProvider[Binary] - with ManifestWrapper[Binary] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val elasticsearchClient: ElasticsearchClient = apply() - } - - class ParentProvider(es: Config) - extends ElasticsearchClientProvider[Parent] - with ManifestWrapper[Parent] { - override protected val manifestWrapper: ManifestW = ManifestW() - - override lazy val config: Config = es - - implicit lazy val elasticsearchClient: ElasticsearchClient = apply() - } -} diff --git a/es9/java/src/test/scala/app/softnetwork/elastic/client/JavaClientCompanionSpec.scala b/es9/java/src/test/scala/app/softnetwork/elastic/client/JavaClientCompanionSpec.scala new file mode 100644 index 00000000..6d88d236 --- /dev/null +++ b/es9/java/src/test/scala/app/softnetwork/elastic/client/JavaClientCompanionSpec.scala @@ -0,0 +1,122 @@ +package app.softnetwork.elastic.client + +import akka.actor.ActorSystem +import app.softnetwork.elastic.client.java.JavaClientCompanion +import app.softnetwork.elastic.scalatest.ElasticDockerTestKit +import app.softnetwork.persistence.generateUUID +import com.typesafe.config.ConfigFactory +import configs.ConfigReader +import org.scalatest.concurrent.ScalaFutures +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec +import org.slf4j.{Logger, LoggerFactory} + +import _root_.java.util.concurrent.TimeUnit +import scala.concurrent.duration.Duration +import scala.concurrent.{Await, ExecutionContextExecutor, Future} +import scala.util.Try + +class JavaClientCompanionSpec + extends AnyWordSpec + with ElasticDockerTestKit + with Matchers + with ScalaFutures { + + lazy val log: Logger = LoggerFactory getLogger getClass.getName + + implicit val system: ActorSystem = ActorSystem(generateUUID()) + + implicit val executionContext: ExecutionContextExecutor = system.dispatcher + + override def afterAll(): Unit = { + Await.result(system.terminate(), Duration(30, TimeUnit.SECONDS)) + super.afterAll() + } + + "RestHighLevelClientCompanion" should { + + "initialize client lazily" in { + val companion = TestCompanion() + companion.isInitialized shouldBe false + + val client = companion.apply() + client should not be null + companion.isInitialized shouldBe true + } + + "return same instance on multiple calls" in { + val companion = TestCompanion() + val client1 = companion.apply() + val client2 = companion.apply() + + client1 should be theSameInstanceAs client2 + } + + "be thread-safe during initialization" in { + val companion = TestCompanion() + val futures = (1 to 100).map { _ => + Future { + companion.apply() + } + } + + val clients = Future.sequence(futures).futureValue + + // Tous les clients doivent être la même instance + clients.distinct.size shouldBe 1 + } + + "close client properly" in { + val companion = TestCompanion() + companion.apply() + companion.isInitialized shouldBe true + + companion.close() + companion.isInitialized shouldBe false + } + + "handle invalid URL gracefully" in { + val companion = TestCompanion("invalid-url") + + Try(an[IllegalArgumentException] should be thrownBy { + companion.apply() + }) + } + + "test connection successfully" in { + val companion = TestCompanion() + companion.testConnection() shouldBe true + } + } + + case class TestCompanion(config: ElasticConfig) extends JavaClientCompanion { + override def elasticConfig: ElasticConfig = config + } + + object TestCompanion { + def apply(): TestCompanion = TestCompanion( + ConfigReader[ElasticConfig] + .read(elasticConfig.withFallback(ConfigFactory.load("softnetwork-elastic.conf")), "elastic") + .toEither match { + case Left(configError) => + throw configError.configException + case Right(r) => r + } + ) + + def apply(url: String): TestCompanion = TestCompanion( + ConfigReader[ElasticConfig] + .read( + ConfigFactory + .parseString(elasticConfigAsString) + .withFallback(ConfigFactory.load("softnetwork-elastic.conf")), + "elastic" + ) + .toEither match { + case Left(configError) => + throw configError.configException + case Right(r) => r.copy(credentials = ElasticCredentials(url)) + } + ) + } +} diff --git a/es9/java/src/test/scala/app/softnetwork/elastic/client/JavaClientSpec.scala b/es9/java/src/test/scala/app/softnetwork/elastic/client/JavaClientSpec.scala new file mode 100644 index 00000000..cf255fb5 --- /dev/null +++ b/es9/java/src/test/scala/app/softnetwork/elastic/client/JavaClientSpec.scala @@ -0,0 +1,3 @@ +package app.softnetwork.elastic.client + +class JavaClientSpec extends ElasticClientSpec diff --git a/es9/java/src/test/scala/app/softnetwork/elastic/persistence/person/JavaClientPersonHandlerSpec.scala b/es9/java/src/test/scala/app/softnetwork/elastic/persistence/person/JavaClientPersonHandlerSpec.scala new file mode 100644 index 00000000..91651d91 --- /dev/null +++ b/es9/java/src/test/scala/app/softnetwork/elastic/persistence/person/JavaClientPersonHandlerSpec.scala @@ -0,0 +1,3 @@ +package app.softnetwork.elastic.persistence.person + +class JavaClientPersonHandlerSpec extends ElasticClientPersonHandlerSpec diff --git a/persistence/build.sbt b/persistence/build.sbt new file mode 100644 index 00000000..45f40ee4 --- /dev/null +++ b/persistence/build.sbt @@ -0,0 +1,9 @@ +import SoftClient4es.* + +organization := "app.softnetwork.elastic" + +name := "softclient4es-persistence" + +libraryDependencies ++= Seq( + "app.softnetwork.persistence" %% "persistence-core" % Versions.genericPersistence excludeAll (jacksonExclusions *) +) \ No newline at end of file diff --git a/core/src/main/scala/app/softnetwork/elastic/persistence/query/ElasticProvider.scala b/persistence/src/main/scala/app/softnetwork/elastic/persistence/query/ElasticProvider.scala similarity index 63% rename from core/src/main/scala/app/softnetwork/elastic/persistence/query/ElasticProvider.scala rename to persistence/src/main/scala/app/softnetwork/elastic/persistence/query/ElasticProvider.scala index cdeed9e8..8daeccba 100644 --- a/core/src/main/scala/app/softnetwork/elastic/persistence/query/ElasticProvider.scala +++ b/persistence/src/main/scala/app/softnetwork/elastic/persistence/query/ElasticProvider.scala @@ -16,7 +16,9 @@ package app.softnetwork.elastic.persistence.query -import app.softnetwork.elastic.client.ElasticClientApi +import app.softnetwork.elastic.client.result.{ElasticFailure, ElasticSuccess} +import app.softnetwork.elastic.client.spi.ElasticClientFactory +import app.softnetwork.elastic.client.{ElasticClientApi, ElasticClientDelegator} import app.softnetwork.elastic.sql.query.SQLQuery import mustache.Mustache import org.json4s.Formats @@ -25,14 +27,21 @@ import app.softnetwork.persistence.model.Timestamped import app.softnetwork.persistence.query.ExternalPersistenceProvider import app.softnetwork.serialization.commonFormats import app.softnetwork.elastic.persistence.typed.Elastic._ +import org.slf4j.Logger import scala.reflect.ClassTag import scala.util.{Failure, Success, Try} /** Created by smanciot on 16/05/2020. */ -trait ElasticProvider[T <: Timestamped] extends ExternalPersistenceProvider[T] { - _: ElasticClientApi with ManifestWrapper[T] => +trait ElasticProvider[T <: Timestamped] + extends ExternalPersistenceProvider[T] + with ElasticClientDelegator { + self: ManifestWrapper[T] => + + lazy val delegate: ElasticClientApi = ElasticClientFactory.create(self.config) + + protected def logger: Logger implicit def formats: Formats = commonFormats @@ -56,13 +65,18 @@ trait ElasticProvider[T <: Timestamped] extends ExternalPersistenceProvider[T] { } protected def initIndex(): Unit = { - Try { - updateMapping(index, loadMapping(mappingPath)) - addAlias(index, alias) - } match { - case Success(_) => logger.info(s"index:$index type:${_type} alias:$alias created") - case Failure(f) => - logger.error(s"!!!!! index:$index type:${_type} alias:$alias -> ${f.getMessage}", f) + updateMapping(index, loadMapping(mappingPath)) match { + case ElasticSuccess(_) => + logger.info(s"index:$index type:${_type} mapping updated") + addAlias(index, alias) match { + case ElasticSuccess(_) => logger.info(s"index:$index type:${_type} alias:$alias created") + case ElasticFailure(elasticError) => + logger.error( + s"!!!!! index:$index type:${_type} alias:$alias -> ${elasticError.message}" + ) + } + case ElasticFailure(elasticError) => + logger.error(s"!!!!! index:$index type:${_type} mapping update -> ${elasticError.message}") } } @@ -78,10 +92,10 @@ trait ElasticProvider[T <: Timestamped] extends ExternalPersistenceProvider[T] { * whether the operation is successful or not */ override def createDocument(document: T)(implicit t: ClassTag[T]): Boolean = { - Try(index(document, Some(index), Some(_type))) match { - case Success(_) => refresh(index) - case Failure(f) => - logger.error(f.getMessage, f) + indexAs(document, document.uuid, Some(index), Some(_type)) match { + case ElasticSuccess(_) => true + case ElasticFailure(elasticError) => + logger.error(s"${elasticError.message}") false } } @@ -99,10 +113,10 @@ trait ElasticProvider[T <: Timestamped] extends ExternalPersistenceProvider[T] { * whether the operation is successful or not */ override def updateDocument(document: T, upsert: Boolean)(implicit t: ClassTag[T]): Boolean = { - Try(update(document, Some(index), Some(_type), upsert)) match { - case Success(_) => refresh(index) - case Failure(f) => - logger.error(f.getMessage, f) + updateAs(document, document.uuid, Some(index), Some(_type), upsert) match { + case ElasticSuccess(_) => true + case ElasticFailure(elasticError) => + logger.error(s"${elasticError.message}") false } } @@ -115,12 +129,10 @@ trait ElasticProvider[T <: Timestamped] extends ExternalPersistenceProvider[T] { * whether the operation is successful or not */ override def deleteDocument(uuid: String): Boolean = { - Try( - delete(uuid, index) - ) match { - case Success(value) => value && refresh(index) - case Failure(f) => - logger.error(f.getMessage, f) + delete(uuid, index) match { + case ElasticSuccess(value) => value + case ElasticFailure(elasticError) => + logger.error(s"${elasticError.message}") false } } @@ -136,17 +148,15 @@ trait ElasticProvider[T <: Timestamped] extends ExternalPersistenceProvider[T] { */ override def upsertDocument(uuid: String, data: String): Boolean = { logger.debug(s"Upserting document $uuid for index $index with $data") - Try( - update( - index, - uuid, - data, - upsert = true - ) + update( + index, + uuid, + data, + upsert = true ) match { - case Success(_) => refresh(index) - case Failure(f) => - logger.error(f.getMessage, f) + case ElasticSuccess(_) => true + case ElasticFailure(elasticError) => + logger.error(s"upsertDocument failed -> ${elasticError.message}") false } } @@ -159,10 +169,10 @@ trait ElasticProvider[T <: Timestamped] extends ExternalPersistenceProvider[T] { * the document retrieved, None otherwise */ override def loadDocument(uuid: String)(implicit m: Manifest[T], formats: Formats): Option[T] = { - Try(get(uuid, Some(index), Some(_type))) match { - case Success(s) => s - case Failure(f) => - logger.error(f.getMessage, f) + getAs(uuid, Some(index), Some(_type)) match { + case ElasticSuccess(result) => result + case ElasticFailure(elasticError) => + logger.error(s"loadDocument failed -> ${elasticError.message}") None } } @@ -177,11 +187,11 @@ trait ElasticProvider[T <: Timestamped] extends ExternalPersistenceProvider[T] { override def searchDocuments( query: String )(implicit m: Manifest[T], formats: Formats): List[T] = { - Try(search(SQLQuery(query))) match { - case Success(s) => s - case Failure(f) => - logger.error(f.getMessage, f) - List.empty + searchAs[T](SQLQuery(query)) match { + case ElasticSuccess(results) => results.toList + case ElasticFailure(elasticError) => + logger.error(s"searchDocuments failed -> ${elasticError.message}") + List.empty[T] } } diff --git a/core/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStream.scala b/persistence/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStream.scala similarity index 100% rename from core/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStream.scala rename to persistence/src/main/scala/app/softnetwork/elastic/persistence/query/State2ElasticProcessorStream.scala diff --git a/core/src/main/scala/app/softnetwork/elastic/persistence/typed/Elastic.scala b/persistence/src/main/scala/app/softnetwork/elastic/persistence/typed/Elastic.scala similarity index 96% rename from core/src/main/scala/app/softnetwork/elastic/persistence/typed/Elastic.scala rename to persistence/src/main/scala/app/softnetwork/elastic/persistence/typed/Elastic.scala index 8f4cdc67..e5edbf5a 100644 --- a/core/src/main/scala/app/softnetwork/elastic/persistence/typed/Elastic.scala +++ b/persistence/src/main/scala/app/softnetwork/elastic/persistence/typed/Elastic.scala @@ -17,13 +17,10 @@ package app.softnetwork.elastic.persistence.typed import app.softnetwork.persistence._ - import app.softnetwork.persistence.model.Timestamped import scala.language.implicitConversions -import app.softnetwork.persistence._ - /** Created by smanciot on 10/04/2020. */ object Elastic { diff --git a/project/Versions.scala b/project/Versions.scala index 3bad99e7..336b9e75 100644 --- a/project/Versions.scala +++ b/project/Versions.scala @@ -1,10 +1,10 @@ object Versions { - // val akka = "2.6.20" // TODO 2.6.20 -> 2.8.3 + val akka = "2.6.20" // TODO 2.6.20 -> 2.8.3 val scalatest = "3.2.19" - val typesafeConfig = "1.4.2" + val typesafeConfig = "1.4.3" val kxbmap_scala2_12 = "0.4.4" diff --git a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala index 01160dc6..5366c80d 100644 --- a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala +++ b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticAggregation.scala @@ -237,7 +237,6 @@ object ElasticAggregation { aggregationsDirection: Map[String, SortOrder], having: Option[Criteria] ): Option[TermsAggregation] = { - Console.println(bucketsDirection) buckets.reverse.foldLeft(Option.empty[TermsAggregation]) { (current, bucket) => var agg = { bucketsDirection.get(bucket.identifier.identifierName) match { diff --git a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticBridge.scala similarity index 92% rename from sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala rename to sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticBridge.scala index 300b7376..27908eb8 100644 --- a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticQuery.scala +++ b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/ElasticBridge.scala @@ -25,7 +25,7 @@ import com.sksamuel.elastic4s.requests.searches.queries.{InnerHit, Query} import scala.annotation.tailrec -case class ElasticQuery(filter: ElasticFilter) { +case class ElasticBridge(filter: ElasticFilter) { def query( innerHitsNames: Set[String] = Set.empty, currentQuery: Option[ElasticBoolQuery] @@ -34,9 +34,9 @@ case class ElasticQuery(filter: ElasticFilter) { case boolQuery: ElasticBoolQuery => import boolQuery._ bool( - mustFilters.map(implicitly[ElasticQuery](_).query(innerHitsNames, currentQuery)), - shouldFilters.map(implicitly[ElasticQuery](_).query(innerHitsNames, currentQuery)), - notFilters.map(implicitly[ElasticQuery](_).query(innerHitsNames, currentQuery)) + mustFilters.map(implicitly[ElasticBridge](_).query(innerHitsNames, currentQuery)), + shouldFilters.map(implicitly[ElasticBridge](_).query(innerHitsNames, currentQuery)), + notFilters.map(implicitly[ElasticBridge](_).query(innerHitsNames, currentQuery)) ) .filter(innerFilters.map(_.query(innerHitsNames, currentQuery))) case nested: ElasticNested => @@ -104,12 +104,12 @@ case class ElasticQuery(filter: ElasticFilter) { case p: Predicate if nestedTrees.size > 1 => val leftNested = ElasticNested(p.leftCriteria, p.leftCriteria.limit) val leftBoolQuery = Option(ElasticBoolQuery(group = true)) - val leftQuery = ElasticQuery(leftNested) + val leftQuery = ElasticBridge(leftNested) .query(innerHitsNames /*++ leftNested.innerHitsName.toSet*/, leftBoolQuery) val rightNested = ElasticNested(p.rightCriteria, p.rightCriteria.limit) val rightBoolQuery = Option(ElasticBoolQuery(group = true)) - val rightQuery = ElasticQuery(rightNested) + val rightQuery = ElasticBridge(rightNested) .query(innerHitsNames /*++ rightNested.innerHitsName.toSet*/, rightBoolQuery) p.operator match { diff --git a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala index 6ccc2488..e20a7d54 100644 --- a/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala +++ b/sql/bridge/src/main/scala/app/softnetwork/elastic/sql/bridge/package.scala @@ -813,8 +813,8 @@ package object bridge { implicit def filterToQuery( filter: ElasticFilter - ): ElasticQuery = { - ElasticQuery(filter) + ): ElasticBridge = { + ElasticBridge(filter) } implicit def sqlQueryToAggregations( diff --git a/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala b/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala index 3462d3a4..d2e785ef 100644 --- a/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala +++ b/sql/bridge/src/test/scala/app/softnetwork/elastic/sql/SQLQuerySpec.scala @@ -6,8 +6,7 @@ import app.softnetwork.elastic.sql.query._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers -//import scala.jdk.CollectionConverters._ -import scala.collection.JavaConverters._ +import scala.jdk.CollectionConverters._ /** Created by smanciot on 13/04/17. */ diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala index 3dbd08b5..68611f9f 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/function/aggregate/package.scala @@ -21,7 +21,9 @@ import app.softnetwork.elastic.sql.{asString, Expr, Identifier, TokenRegex, Upda package object aggregate { - sealed trait AggregateFunction extends Function + sealed trait AggregateFunction extends Function { + def multivalued: Boolean = false + } case object COUNT extends Expr("COUNT") with AggregateFunction @@ -105,6 +107,13 @@ package object aggregate { override def withPartitionBy(partitionBy: Seq[Identifier]): TopHitsAggregation = this.copy(partitionBy = partitionBy) override def withFields(fields: Seq[Field]): TopHitsAggregation = this.copy(fields = fields) + override def update(request: SQLSearchRequest): TopHitsAggregation = super + .update(request) + .asInstanceOf[FirstValue] + .copy( + identifier = identifier.update(request), + orderBy = orderBy.update(request) + ) } case class LastValue( @@ -118,6 +127,13 @@ package object aggregate { override def withPartitionBy(partitionBy: Seq[Identifier]): TopHitsAggregation = this.copy(partitionBy = partitionBy) override def withFields(fields: Seq[Field]): TopHitsAggregation = this.copy(fields = fields) + override def update(request: SQLSearchRequest): TopHitsAggregation = super + .update(request) + .asInstanceOf[LastValue] + .copy( + identifier = identifier.update(request), + orderBy = orderBy.update(request) + ) } case class ArrayAgg( @@ -135,8 +151,11 @@ package object aggregate { .update(request) .asInstanceOf[ArrayAgg] .copy( + identifier = identifier.update(request), + orderBy = orderBy.update(request), limit = limit.orElse(request.limit) ) + override def multivalued: Boolean = true } } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala index b02cc9b9..e7ac14f1 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/package.scala @@ -33,6 +33,10 @@ import scala.util.matching.Regex */ package object sql { + /** type alias for SQL query + */ + type SQL = String + import app.softnetwork.elastic.sql.function._ import app.softnetwork.elastic.sql.`type`._ diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala index 68d6ae60..94e111ac 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/OrderBy.scala @@ -17,7 +17,7 @@ package app.softnetwork.elastic.sql.query import app.softnetwork.elastic.sql.function.{Function, FunctionChain} -import app.softnetwork.elastic.sql.{Expr, TokenRegex, Updateable} +import app.softnetwork.elastic.sql.{Expr, Identifier, TokenRegex, Updateable} case object OrderBy extends Expr("ORDER BY") with TokenRegex @@ -36,7 +36,9 @@ case class FieldSort( lazy val direction: SortOrder = order.getOrElse(Asc) lazy val name: String = toSQL(field) override def sql: String = s"$name $direction" - override def update(request: SQLSearchRequest): FieldSort = this // No update logic for now TODO + override def update(request: SQLSearchRequest): FieldSort = this.copy( + field = Identifier(field).update(request).name + ) } case class OrderBy(sorts: Seq[FieldSort]) extends Updateable { diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLMultiSearchRequest.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLMultiSearchRequest.scala index f464f914..459472e1 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLMultiSearchRequest.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLMultiSearchRequest.scala @@ -19,7 +19,7 @@ package app.softnetwork.elastic.sql.query import app.softnetwork.elastic.sql.Token case class SQLMultiSearchRequest(requests: Seq[SQLSearchRequest]) extends Token { - override def sql: String = s"${requests.map(_.sql).mkString(" union ")}" + override def sql: String = s"${requests.map(_.sql).mkString(" UNION ALL ")}" def update(): SQLMultiSearchRequest = this.copy(requests = requests.map(_.update())) @@ -29,4 +29,10 @@ case class SQLMultiSearchRequest(requests: Seq[SQLSearchRequest]) extends Token case errors => Left(errors.map { case Left(err) => err }.mkString("\n")) } } + + lazy val sqlAggregations: Map[String, SQLAggregation] = + requests.flatMap(_.sqlAggregations).distinct.toMap + + lazy val fieldAliases: Map[String, String] = + requests.flatMap(_.fieldAliases).distinct.toMap } diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLQuery.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLQuery.scala index 96132b08..29c1bb9c 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLQuery.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLQuery.scala @@ -16,7 +16,15 @@ package app.softnetwork.elastic.sql.query -case class SQLQuery(query: String, score: Option[Double] = None) { +import app.softnetwork.elastic.sql.SQL + +/** SQL Query wrapper + * @param query + * - the SQL query + * @param score + * - optional minimum score for the elasticsearch query + */ +case class SQLQuery(query: SQL, score: Option[Double] = None) { import app.softnetwork.elastic.sql.SQLImplicits._ lazy val request: Option[Either[SQLSearchRequest, SQLMultiSearchRequest]] = { query diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala index 4dda85f8..4d52aca3 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/SQLSearchRequest.scala @@ -128,6 +128,9 @@ case class SQLSearchRequest( lazy val aggregates: Seq[Field] = select.fields.filter(_.aggregation).filterNot(_.topHits.isDefined) ++ topHitsFields + lazy val sqlAggregations: Map[String, SQLAggregation] = + aggregates.flatMap(f => SQLAggregation.fromField(f, this)).map(a => a.aggName -> a).toMap + lazy val excludes: Seq[String] = select.except.map(_.fields.map(_.sourceField)).getOrElse(Nil) lazy val sources: Seq[String] = from.tables.map(_.name) diff --git a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala index 57d9bf9b..15d7df81 100644 --- a/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala +++ b/sql/src/main/scala/app/softnetwork/elastic/sql/query/Select.scala @@ -16,8 +16,8 @@ package app.softnetwork.elastic.sql.query -import app.softnetwork.elastic.sql.function.aggregate.TopHitsAggregation -import app.softnetwork.elastic.sql.function.{Function, FunctionChain} +import app.softnetwork.elastic.sql.function.aggregate.{AggregateFunction, TopHitsAggregation} +import app.softnetwork.elastic.sql.function.{Function, FunctionChain, FunctionUtils} import app.softnetwork.elastic.sql.{ asString, Alias, @@ -68,17 +68,18 @@ case class Field( functions.collectFirst { case th: TopHitsAggregation => th } def update(request: SQLSearchRequest): Field = { - val updated = - topHits match { - case Some(th) => - val topHitsAggregation = th.update(request) - identifier.functions match { - case _ :: tail => identifier.withFunctions(functions = topHitsAggregation +: tail) - case _ => identifier.withFunctions(functions = List(topHitsAggregation)) - } - case None => identifier - } - this.copy(identifier = updated.update(request)) + topHits match { + case Some(th) => + val topHitsAggregation = th.update(request) + val identifier = topHitsAggregation.identifier + identifier.functions match { + case _ :: tail => + this.copy(identifier = identifier.withFunctions(functions = topHitsAggregation +: tail)) + case _ => + this.copy(identifier = identifier.withFunctions(functions = List(topHitsAggregation))) + } + case None => this.copy(identifier = identifier.update(request)) + } } def painless(context: Option[PainlessContext]): String = identifier.painless(context) @@ -124,3 +125,99 @@ case class Select( } } } + +case class SQLAggregation( + aggName: String, + field: String, + sourceField: String, + distinct: Boolean = false, + aggType: AggregateFunction, + direction: Option[SortOrder] = None, + nestedElement: Option[NestedElement] = None, + buckets: Seq[String] = Seq.empty +) { + val nested: Boolean = nestedElement.nonEmpty + val multivalued: Boolean = aggType.multivalued +} + +object SQLAggregation { + def fromField(field: Field, request: SQLSearchRequest): Option[SQLAggregation] = { + field.aggregateFunction.map { aggType => + import field._ + val sourceField = identifier.path + + val direction = request.sorts.get(identifier.identifierName) + + val _field = fieldAlias match { + case Some(alias) => alias.alias + case _ => sourceField + } + + val distinct = identifier.distinct + + val aggName = { + if (fieldAlias.isDefined) + _field + else if (distinct) + s"${aggType}_distinct_${sourceField.replace(".", "_")}" + else { + aggType match { + case th: TopHitsAggregation => + s"${th.topHits.sql.toLowerCase}_${sourceField.replace(".", "_")}" + case _ => + s"${aggType}_${sourceField.replace(".", "_")}" + + } + } + } + + var aggPath = Seq[String]() + + val (aggFuncs, _) = FunctionUtils.aggregateAndTransformFunctions(identifier) + + require(aggFuncs.size == 1, s"Multiple aggregate functions not supported: $aggFuncs") + + val filteredAggName = "filtered_agg" + + def filtered(): Unit = + request.having match { + case Some(_) => + aggPath ++= Seq(filteredAggName) + aggPath ++= Seq(aggName) + case _ => + aggPath ++= Seq(aggName) + } + + val nestedElement = identifier.nestedElement + + val nestedElements: Seq[NestedElement] = + nestedElement.map(n => NestedElements.buildNestedTrees(Seq(n))).getOrElse(Nil) + + nestedElements match { + case Nil => + case nestedElements => + def buildNested(n: NestedElement): Unit = { + aggPath ++= Seq(n.innerHitsName) + val children = n.children + if (children.nonEmpty) { + children.map(buildNested) + } + } + buildNested(nestedElements.head) + } + + filtered() + + SQLAggregation( + aggPath.mkString("."), + _field, + sourceField, + distinct = distinct, + aggType = aggType, + direction = direction, + nestedElement = field.identifier.nestedElement, + buckets = request.buckets.map { _.name } + ) + } + } +}