diff --git a/bundle-3.4/pom.xml b/bundle-3.4/pom.xml new file mode 100644 index 00000000..1ea36ec9 --- /dev/null +++ b/bundle-3.4/pom.xml @@ -0,0 +1,1344 @@ + + + + + 4.0.0 + spark-3.4-spline-agent-bundle_2.12 + jar + + + za.co.absa.spline.agent.spark + spline-spark-agent_2.12 + ../pom.xml + 2.1.0-SNAPSHOT + + + + + za.co.absa.spline.agent.spark + agent-core_${scala.binary.version} + ${project.version} + + + + + + + javax.activation + activation + provided + + + io.airlift + aircompressor + provided + + + org.typelevel + algebra_2.12 + provided + + + org.jetbrains + annotations + provided + + + org.antlr + antlr-runtime + provided + + + org.antlr + antlr4-runtime + provided + + + org.glassfish.hk2.external + aopalliance-repackaged + provided + + + dev.ludovic.netlib + arpack + provided + + + net.sourceforge.f2j + arpack_combined_all + provided + + + org.apache.arrow + arrow-format + provided + + + org.apache.arrow + arrow-memory-core + provided + + + org.apache.arrow + arrow-memory-netty + provided + + + org.apache.arrow + arrow-vector + provided + + + org.apache.yetus + audience-annotations + provided + + + org.apache.avro + avro + provided + + + org.apache.avro + avro-ipc + provided + + + org.apache.avro + avro-mapred + provided + + + dev.ludovic.netlib + blas + provided + + + com.jolbox + bonecp + provided + + + org.scalanlp + breeze-macros_2.12 + provided + + + org.scalanlp + breeze_2.12 + provided + + + org.typelevel + cats-kernel_2.12 + provided + + + com.twitter + chill-java + provided + + + com.twitter + chill_2.12 + provided + + + commons-cli + commons-cli + provided + + + commons-codec + commons-codec + provided + + + commons-collections + commons-collections + provided + + + org.apache.commons + commons-collections4 + provided + + + org.codehaus.janino + commons-compiler + provided + + + org.apache.commons + commons-compress + provided + + + org.apache.commons + commons-crypto + provided + + + commons-dbcp + commons-dbcp + provided + + + commons-io + commons-io + provided + + + commons-lang + commons-lang + provided + + + org.apache.commons + commons-lang3 + provided + + + commons-logging + commons-logging + provided + + + org.apache.commons + commons-math3 + provided + + + commons-pool + commons-pool + provided + + + org.apache.commons + commons-text + provided + + + com.ning + compress-lzf + provided + + + org.apache.curator + curator-client + provided + + + org.apache.curator + curator-framework + provided + + + org.apache.curator + curator-recipes + provided + + + org.datanucleus + datanucleus-api-jdo + provided + + + org.datanucleus + datanucleus-core + provided + + + org.datanucleus + datanucleus-rdbms + provided + + + org.apache.derby + derby + provided + + + com.github.joshelser + dropwizard-metrics-hadoop-metrics2-reporter + provided + + + com.google.flatbuffers + flatbuffers-java + provided + + + com.google.code.gson + gson + provided + + + com.google.guava + guava + provided + + + org.apache.hadoop + hadoop-client-api + provided + + + org.apache.hadoop + hadoop-client-runtime + provided + + + org.apache.hadoop.thirdparty + hadoop-shaded-guava + provided + + + org.apache.hadoop + hadoop-yarn-server-web-proxy + provided + + + com.zaxxer + HikariCP + provided + + + org.apache.hive + hive-beeline + provided + + + org.apache.hive + hive-cli + provided + + + org.apache.hive + hive-common + provided + + + org.apache.hive + hive-exec + provided + + + org.apache.hive + hive-jdbc + provided + + + org.apache.hive + hive-llap-common + provided + + + org.apache.hive + hive-metastore + provided + + + org.apache.hive + hive-serde + provided + + + org.apache.hive + hive-service-rpc + provided + + + org.apache.hive + hive-shims + provided + + + org.apache.hive + hive-shims + provided + + + org.apache.hive.shims + hive-shims-common + provided + + + org.apache.hive.shims + hive-shims-scheduler + provided + + + org.apache.hive + hive-storage-api + provided + + + org.glassfish.hk2 + hk2-api + provided + + + org.glassfish.hk2 + hk2-locator + provided + + + org.glassfish.hk2 + hk2-utils + provided + + + org.apache.httpcomponents + httpclient + provided + + + org.apache.httpcomponents + httpcore + provided + + + com.sun.istack + istack-commons-runtime + provided + + + org.apache.ivy + ivy + provided + + + com.fasterxml.jackson.core + jackson-annotations + provided + + + com.fasterxml.jackson.core + jackson-core + provided + + + org.codehaus.jackson + jackson-core-asl + provided + + + com.fasterxml.jackson.core + jackson-databind + provided + + + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + provided + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + provided + + + org.codehaus.jackson + jackson-mapper-asl + provided + + + com.fasterxml.jackson.module + jackson-module-scala_2.12 + provided + + + jakarta.annotation + jakarta.annotation-api + provided + + + org.glassfish.hk2.external + jakarta.inject + provided + + + jakarta.servlet + jakarta.servlet-api + provided + + + jakarta.validation + jakarta.validation-api + provided + + + jakarta.ws.rs + jakarta.ws.rs-api + provided + + + jakarta.xml.bind + jakarta.xml.bind-api + provided + + + org.codehaus.janino + janino + provided + + + org.javassist + javassist + provided + + + org.datanucleus + javax.jdo + provided + + + javolution + javolution + provided + + + org.glassfish.jaxb + jaxb-runtime + provided + + + org.slf4j + jcl-over-slf4j + provided + + + javax.jdo + jdo-api + provided + + + org.glassfish.jersey.core + jersey-client + provided + + + org.glassfish.jersey.core + jersey-common + provided + + + org.glassfish.jersey.containers + jersey-container-servlet + provided + + + org.glassfish.jersey.containers + jersey-container-servlet-core + provided + + + org.glassfish.jersey.inject + jersey-hk2 + provided + + + org.glassfish.jersey.core + jersey-server + provided + + + pl.edu.icm + JLargeArrays + provided + + + jline + jline + provided + + + joda-time + joda-time + provided + + + org.jodd + jodd-core + provided + + + net.sf.jpam + jpam + provided + + + com.tdunning + json + provided + + + org.json4s + json4s-ast_2.12 + provided + + + org.json4s + json4s-core_2.12 + provided + + + org.json4s + json4s-jackson_2.12 + provided + + + org.json4s + json4s-scalap_2.12 + provided + + + com.google.code.findbugs + jsr305 + provided + + + javax.transaction + jta + provided + + + com.github.wendykierp + JTransforms + provided + + + org.slf4j + jul-to-slf4j + provided + + + com.esotericsoftware + kryo-shaded + provided + + + io.fabric8 + kubernetes-client + provided + + + io.fabric8 + kubernetes-client-api + provided + + + io.fabric8 + kubernetes-httpclient-okhttp + provided + + + io.fabric8 + kubernetes-model-admissionregistration + provided + + + io.fabric8 + kubernetes-model-apiextensions + provided + + + io.fabric8 + kubernetes-model-apps + provided + + + io.fabric8 + kubernetes-model-autoscaling + provided + + + io.fabric8 + kubernetes-model-batch + provided + + + io.fabric8 + kubernetes-model-certificates + provided + + + io.fabric8 + kubernetes-model-common + provided + + + io.fabric8 + kubernetes-model-coordination + provided + + + io.fabric8 + kubernetes-model-core + provided + + + io.fabric8 + kubernetes-model-discovery + provided + + + io.fabric8 + kubernetes-model-events + provided + + + io.fabric8 + kubernetes-model-extensions + provided + + + io.fabric8 + kubernetes-model-flowcontrol + provided + + + io.fabric8 + kubernetes-model-gatewayapi + provided + + + io.fabric8 + kubernetes-model-metrics + provided + + + io.fabric8 + kubernetes-model-networking + provided + + + io.fabric8 + kubernetes-model-node + provided + + + io.fabric8 + kubernetes-model-policy + provided + + + io.fabric8 + kubernetes-model-rbac + provided + + + io.fabric8 + kubernetes-model-scheduling + provided + + + io.fabric8 + kubernetes-model-storageclass + provided + + + dev.ludovic.netlib + lapack + provided + + + org.fusesource.leveldbjni + leveldbjni-all + provided + + + org.apache.thrift + libfb303 + provided + + + org.apache.thrift + libthrift + provided + + + log4j + log4j + provided + + + org.apache.logging.log4j + log4j-api + provided + + + org.apache.logging.log4j + log4j-core + provided + + + org.apache.logging.log4j + log4j-slf4j2-impl + provided + + + com.squareup.okhttp3 + logging-interceptor + provided + + + org.lz4 + lz4-java + provided + + + org.apache.mesos + mesos + provided + + + io.dropwizard.metrics + metrics-core + provided + + + io.dropwizard.metrics + metrics-graphite + provided + + + io.dropwizard.metrics + metrics-jmx + provided + + + io.dropwizard.metrics + metrics-json + provided + + + io.dropwizard.metrics + metrics-jvm + provided + + + com.esotericsoftware + minlog + provided + + + io.netty + netty-all + provided + + + io.netty + netty-buffer + provided + + + io.netty + netty-codec + provided + + + io.netty + netty-codec-http + provided + + + io.netty + netty-codec-http2 + provided + + + io.netty + netty-codec-socks + provided + + + io.netty + netty-common + provided + + + io.netty + netty-handler + provided + + + io.netty + netty-handler-proxy + provided + + + io.netty + netty-resolver + provided + + + io.netty + netty-transport + provided + + + io.netty + netty-transport-classes-epoll + provided + + + io.netty + netty-transport-classes-kqueue + provided + + + io.netty + netty-transport-native-epoll + provided + + + io.netty + netty-transport-native-epoll + provided + + + io.netty + netty-transport-native-kqueue + provided + + + io.netty + netty-transport-native-kqueue + provided + + + io.netty + netty-transport-native-unix-common + provided + + + org.objenesis + objenesis + provided + + + com.squareup.okhttp3 + okhttp + provided + + + com.squareup.okio + okio + provided + + + net.sf.opencsv + opencsv + provided + + + org.apache.orc + orc-core + provided + + + org.apache.orc + orc-mapreduce + provided + + + org.apache.orc + orc-shims + provided + + + oro + oro + provided + + + org.glassfish.hk2 + osgi-resource-locator + provided + + + com.thoughtworks.paranamer + paranamer + provided + + + org.apache.parquet + parquet-column + provided + + + org.apache.parquet + parquet-common + provided + + + org.apache.parquet + parquet-encoding + provided + + + org.apache.parquet + parquet-format-structures + provided + + + org.apache.parquet + parquet-hadoop + provided + + + org.apache.parquet + parquet-jackson + provided + + + net.razorvine + pickle + provided + + + com.google.protobuf + protobuf-java + provided + + + net.sf.py4j + py4j + provided + + + org.roaringbitmap + RoaringBitmap + provided + + + org.rocksdb + rocksdbjni + provided + + + org.scala-lang.modules + scala-collection-compat_2.12 + provided + + + org.scala-lang + scala-compiler + provided + + + org.scala-lang + scala-library + provided + + + org.scala-lang.modules + scala-parser-combinators_2.12 + provided + + + org.scala-lang + scala-reflect + provided + + + org.scala-lang.modules + scala-xml_2.12 + provided + + + org.roaringbitmap + shims + provided + + + org.slf4j + slf4j-api + provided + + + org.yaml + snakeyaml + provided + + + org.xerial.snappy + snappy-java + provided + + + org.apache.spark + spark-catalyst_2.12 + provided + + + org.apache.spark + spark-core_2.12 + provided + + + org.apache.spark + spark-graphx_2.12 + provided + + + org.apache.spark + spark-hive-thriftserver_2.12 + provided + + + org.apache.spark + spark-hive_2.12 + provided + + + org.apache.spark + spark-kubernetes_2.12 + provided + + + org.apache.spark + spark-kvstore_2.12 + provided + + + org.apache.spark + spark-launcher_2.12 + provided + + + org.apache.spark + spark-mesos_2.12 + provided + + + org.apache.spark + spark-mllib-local_2.12 + provided + + + org.apache.spark + spark-mllib_2.12 + provided + + + org.apache.spark + spark-network-common_2.12 + provided + + + org.apache.spark + spark-network-shuffle_2.12 + provided + + + org.apache.spark + spark-repl_2.12 + provided + + + org.apache.spark + spark-sketch_2.12 + provided + + + org.apache.spark + spark-sql_2.12 + provided + + + org.apache.spark + spark-streaming_2.12 + provided + + + org.apache.spark + spark-tags_2.12 + provided + + + org.apache.spark + spark-unsafe_2.12 + provided + + + org.apache.spark + spark-yarn_2.12 + provided + + + org.typelevel + spire-macros_2.12 + provided + + + org.typelevel + spire-platform_2.12 + provided + + + org.typelevel + spire-util_2.12 + provided + + + org.typelevel + spire_2.12 + provided + + + org.antlr + ST4 + provided + + + stax + stax-api + provided + + + com.clearspring.analytics + stream + provided + + + net.sf.supercsv + super-csv + provided + + + org.threeten + threeten-extra + provided + + + com.google.crypto.tink + tink + provided + + + javax.transaction + transaction-api + provided + + + com.univocity + univocity-parsers + provided + + + org.apache.xbean + xbean-asm9-shaded + provided + + + org.tukaani + xz + provided + + + io.fabric8 + zjsonpatch + provided + + + org.apache.zookeeper + zookeeper + provided + + + org.apache.zookeeper + zookeeper-jute + provided + + + com.github.luben + zstd-jni + provided + + + + + + + + org.spurint.maven.plugins + mima-maven-plugin + + + check-abi + none + + + + + com.github.cerveada + scalatest-maven-plugin + + true + + + + org.apache.maven.plugins + maven-jar-plugin + + + empty-javadoc-jar + package + + jar + + + javadoc + ${basedir}/javadoc + + + + + + org.apache.maven.plugins + maven-source-plugin + + + empty-sources-jar + + jar-no-fork + + + true + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + + + + + + diff --git a/core/src/main/scala/za/co/absa/spline/harvester/plugin/embedded/RDDPlugin.scala b/core/src/main/scala/za/co/absa/spline/harvester/plugin/embedded/RDDPlugin.scala index e6c876fb..dddfbd0b 100644 --- a/core/src/main/scala/za/co/absa/spline/harvester/plugin/embedded/RDDPlugin.scala +++ b/core/src/main/scala/za/co/absa/spline/harvester/plugin/embedded/RDDPlugin.scala @@ -19,7 +19,7 @@ package za.co.absa.spline.harvester.plugin.embedded import org.apache.spark.Partition import org.apache.spark.rdd.{HadoopRDD, RDD} import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.execution.datasources.FileScanRDD +import org.apache.spark.sql.execution.datasources.{FileScanRDD, PartitionedFile} import za.co.absa.spline.commons.reflect.ReflectionUtils import za.co.absa.spline.harvester.builder._ import za.co.absa.spline.harvester.plugin.Plugin.{Precedence, ReadNodeInfo} @@ -39,7 +39,8 @@ class RDDPlugin( override def rddReadNodeProcessor: PartialFunction[RDD[_], ReadNodeInfo] = { case fsr: FileScanRDD => - val uris = fsr.filePartitions.flatMap(_.files.map(_.filePath)) + val files = fsr.filePartitions.flatMap(_.files) + val uris = files.map(extractPath(_)) ReadNodeInfo(SourceIdentifier(None, uris: _*), Map.empty) case hr: HadoopRDD[_, _] => val partitions = ReflectionUtils.extractValue[Array[Partition]](hr, "partitions_") @@ -47,6 +48,13 @@ class RDDPlugin( ReadNodeInfo(SourceIdentifier(None, uris: _*), Map.empty) } + private def extractPath(file: PartitionedFile): String = { + val path = ReflectionUtils.extractValue[AnyRef](file, "filePath") + // for Spark 3.3 and lower path is a String + // for Spark 3.4 path is org.apache.spark.paths.SparkPath + path.toString + } + private def hadoopPartitionToUriString(hadoopPartition: Partition): String = { val inputSplit = ReflectionUtils.extractValue[AnyRef](hadoopPartition, "inputSplit") val fileSplitT = ReflectionUtils.extractValue[AnyRef](inputSplit, "t") @@ -56,5 +64,4 @@ class RDDPlugin( uri.toString } - } diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml index 3dcaadfc..8739193f 100644 --- a/integration-tests/pom.xml +++ b/integration-tests/pom.xml @@ -175,7 +175,7 @@ org.elasticsearch elasticsearch-spark-${elasticsearch.spark.sufix}_${scala.binary.version} - 8.2.2 + 8.9.1 test @@ -267,6 +267,21 @@ + + spark-3.4 + + 16.0.1 + 30 + + + + org.apache.iceberg + iceberg-spark-runtime-3.4_${scala.binary.version} + 1.3.1 + test + + + diff --git a/integration-tests/src/test/scala/za/co/absa/spline/BasicIntegrationTests.scala b/integration-tests/src/test/scala/za/co/absa/spline/BasicIntegrationTests.scala index 27004e9f..f623e40a 100644 --- a/integration-tests/src/test/scala/za/co/absa/spline/BasicIntegrationTests.scala +++ b/integration-tests/src/test/scala/za/co/absa/spline/BasicIntegrationTests.scala @@ -16,6 +16,7 @@ package za.co.absa.spline +import org.apache.spark.SPARK_VERSION import org.apache.spark.internal.Logging import org.apache.spark.sql.Row import org.apache.spark.sql.SaveMode._ @@ -24,9 +25,13 @@ import org.apache.spark.sql.types.{IntegerType, StructField, StructType} import org.scalatest.flatspec.AsyncFlatSpec import org.scalatest.matchers.should.Matchers import za.co.absa.spline.commons.io.TempDirectory +import za.co.absa.spline.commons.version.Version.VersionStringInterpolator +import za.co.absa.spline.producer.model.{ExecutionEvent, ExecutionPlan} import za.co.absa.spline.test.fixture.SparkFixture import za.co.absa.spline.test.fixture.spline.SplineFixture +import scala.concurrent.Future + class BasicIntegrationTests extends AsyncFlatSpec with Matchers with SparkFixture @@ -117,15 +122,41 @@ class BasicIntegrationTests extends AsyncFlatSpec .write.mode(Append).saveAsTable(tableName) } - (plan2, _) <- captor.lineageOf { + // Spark 3.4+ is creating 2 commands for both writes here so we need to ignore one + // We only want the one that is from CreateDataSourceTableAsSelectCommand + // The one we ignore here is an extra InsertIntoHadoopFsRelationCommand + // They can come out of order so we need to filter out which one is which. + (plan2, _) <- if (ver"$SPARK_VERSION" >= ver"3.4.0") { + captor.lineageOf { + Thread.sleep(5000) + } + } else Future[(ExecutionPlan, Seq[ExecutionEvent])](null, null) + + (plan3, _) <- captor.lineageOf { spark .read.table(tableName) .write.mode(Overwrite).saveAsTable("somewhere") } + + (plan4, _) <- if (ver"$SPARK_VERSION" >= ver"3.4.0") { + captor.lineageOf { + Thread.sleep(5000) + } + } else Future[(ExecutionPlan, Seq[ExecutionEvent])](null, null) } yield { println("yield") - val writeUri = plan1.operations.write.outputSource - val readUri = plan2.operations.reads.head.inputSources.head + + val writePlan = Seq(plan1, plan2) + .filter(null.!=) + .find(_.operations.write.name == "CreateDataSourceTableAsSelectCommand") + .get + val readPlan = Seq(plan3, plan4) + .filter(null.!=) + .find(_.operations.write.name == "CreateDataSourceTableAsSelectCommand") + .get + + val writeUri = writePlan.operations.write.outputSource + val readUri = readPlan.operations.reads.head.inputSources.head writeUri shouldEqual readUri } diff --git a/integration-tests/src/test/scala/za/co/absa/spline/DeltaSpec.scala b/integration-tests/src/test/scala/za/co/absa/spline/DeltaSpec.scala index 4e1c2b72..d8780c03 100644 --- a/integration-tests/src/test/scala/za/co/absa/spline/DeltaSpec.scala +++ b/integration-tests/src/test/scala/za/co/absa/spline/DeltaSpec.scala @@ -37,7 +37,7 @@ class DeltaSpec extends AsyncFlatSpec private val deltaPath = TempDirectory(prefix = "delta", pathOnly = true).deleteOnExit().toURI.toString it should "support Delta Lake as a source" taggedAs - ignoreIf(ver"$SPARK_VERSION" < ver"2.4.2") in + ignoreIf(ver"$SPARK_VERSION" < ver"2.4.2" || ver"$SPARK_VERSION" >= ver"3.4.0") in withNewSparkSession { implicit spark => withLineageTracking { captor => val testData: DataFrame = { @@ -79,7 +79,7 @@ class DeltaSpec extends AsyncFlatSpec } it should "support insert into existing Delta Lake table" taggedAs - ignoreIf(ver"$SPARK_VERSION" < ver"2.4.2") in + ignoreIf(ver"$SPARK_VERSION" < ver"2.4.2" || ver"$SPARK_VERSION" >= ver"3.4.0") in withNewSparkSession { implicit spark => withLineageTracking { lineageCaptor => val testData: DataFrame = { diff --git a/integration-tests/src/test/scala/za/co/absa/spline/KafkaSinkSpec.scala b/integration-tests/src/test/scala/za/co/absa/spline/KafkaSinkSpec.scala index a8e8eccc..0da51990 100644 --- a/integration-tests/src/test/scala/za/co/absa/spline/KafkaSinkSpec.scala +++ b/integration-tests/src/test/scala/za/co/absa/spline/KafkaSinkSpec.scala @@ -48,8 +48,9 @@ class KafkaSinkSpec EmbeddedKafka.stop() } - it should "support Kafka as a write source" in { + it should "support Kafka as a write source while reading from multiple Kafka read sources" in { val topicName = "bananas" + val otherTopicName = "anotherTopic" withNewSparkSession { implicit spark => withLineageTracking { captor => @@ -74,6 +75,16 @@ class KafkaSinkSpec .option("topic", topicName) .save()) + // Write to another topic seeding lineage for a downstream read + (_, _) <- captor.lineageOf( + testData + .selectExpr("CAST (name AS STRING) AS value") + .write + .format("kafka") + .option("kafka.bootstrap.servers", kafkaUrl) + .option("topic", otherTopicName) + .save()) + (plan2, _) <- captor.lineageOf( reader .option("subscribe", s"$topicName,anotherTopic") @@ -98,6 +109,7 @@ class KafkaSinkSpec plan2.operations.reads.head.extra("sourceType") shouldBe Some("kafka") plan2.operations.reads.head.inputSources should contain(s"kafka:$topicName") + plan2.operations.reads.head.inputSources should contain(s"kafka:$otherTopicName") plan2.operations.reads.head.params should contain key "subscribe" plan3.operations.reads.head.extra("sourceType") shouldBe Some("kafka") diff --git a/integration-tests/src/test/scala/za/co/absa/spline/harvester/LineageHarvesterSpec.scala b/integration-tests/src/test/scala/za/co/absa/spline/harvester/LineageHarvesterSpec.scala index f88a7bca..c63c23b2 100644 --- a/integration-tests/src/test/scala/za/co/absa/spline/harvester/LineageHarvesterSpec.scala +++ b/integration-tests/src/test/scala/za/co/absa/spline/harvester/LineageHarvesterSpec.scala @@ -36,6 +36,7 @@ import za.co.absa.spline.test.fixture.spline.SplineFixture import za.co.absa.spline.test.fixture.{SparkDatabaseFixture, SparkFixture} import java.util.UUID +import scala.concurrent.Future import scala.language.reflectiveCalls import scala.util.Try @@ -376,11 +377,25 @@ class LineageHarvesterSpec extends AsyncFlatSpec val df = spark.createDataset(Seq(TestRow(1, 2.3, "text"))) for { - (plan, _) <- captor.lineageOf { + (plan1, _) <- captor.lineageOf { df.createOrReplaceTempView("tempView") spark.sql("CREATE TABLE users_sales AS SELECT i, d, s FROM tempView ") } + // Spark 3.4+ is creating 2 commands for this CTAS here so we need to ignore one + // We only want the one that is from CreateHiveTableAsSelectCommand + // The one we ignore here is an extra InsertIntoHiveTableCommand + // They can come out of order so we need to filter out which one is which. + (plan2, _) <- if (ver"$SPARK_VERSION" >= ver"3.4.0") { + captor.lineageOf { + Thread.sleep(5000) + } + } else Future[(ExecutionPlan, Seq[ExecutionEvent])](null, null) } yield { + val plan = Seq(plan1, plan2) + .filter(null.!=) + .find(_.operations.write.name == "CreateHiveTableAsSelectCommand") + .get + val writeOperation = plan.operations.write writeOperation.id shouldEqual "op-0" writeOperation.append shouldEqual false @@ -500,7 +515,7 @@ class LineageHarvesterSpec extends AsyncFlatSpec plan should not be null event.durationNs should be(empty) event.error should not be empty - event.error.get.toString should include(s"path ${tmpLocal.toURI.toString.stripSuffix("/")} already exists") + event.error.get.toString.toLowerCase should include(s"path ${tmpLocal.toURI.toString.stripSuffix("/")} already exists") } } } diff --git a/pom.xml b/pom.xml index 7d94a00b..897ffc97 100644 --- a/pom.xml +++ b/pom.xml @@ -91,6 +91,7 @@ 3.1.3 3.2.3 3.3.1 + 3.4.1 @@ -100,6 +101,8 @@ 1.0.0 2.0.0 2.1.0 + 2.4.0 + ${cassandra-connector-24.version} @@ -108,6 +111,7 @@ 3.1.0 3.2.0 3.3.0 + 3.4.1 0.13.7 @@ -210,6 +214,7 @@ com.github.cerveada scalatest-maven-plugin + I ${project.build.directory}/surefire-reports . WDF TestSuite.txt @@ -815,6 +820,29 @@ + + spark-3.4 + + ${spark-34.version} + ${delta-24.version} + ${spark-34.version}_0.19.0 + ${cassandra-connector-34.version} + + + + + org.apache.spark + spark-parent_${scala.binary.version} + ${spark.version} + pom + import + + + + + + +