AbsaOSS · cerveada · Aug 18, 2020 · Aug 17, 2020 · Aug 18, 2020
diff --git a/core/src/main/scala/za/co/absa/spline/harvester/plugin/embedded/DatabricksPlugin.scala b/core/src/main/scala/za/co/absa/spline/harvester/plugin/embedded/DatabricksPlugin.scala
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2020 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.spline.harvester.plugin.embedded
+
+import javax.annotation.Priority
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.{SaveMode, SparkSession}
+import za.co.absa.commons.reflect.ReflectionUtils.extractFieldValue
+import za.co.absa.commons.reflect.extractors.SafeTypeMatchingExtractor
+import za.co.absa.spline.harvester.plugin.Plugin.{Precedence, WriteNodeInfo}
+import za.co.absa.spline.harvester.plugin.embedded.DatabricksPlugin.`_: DataBricksCreateDeltaTableCommand`
+import za.co.absa.spline.harvester.plugin.extractor.CatalogTableExtractor
+import za.co.absa.spline.harvester.plugin.{Plugin, ReadNodeProcessing, WriteNodeProcessing}
+import za.co.absa.spline.harvester.qualifier.PathQualifier
+
+import scala.language.reflectiveCalls
+
+@Priority(Precedence.Normal)
+class DatabricksPlugin(
+  pathQualifier: PathQualifier,
+  session: SparkSession)
+  extends Plugin
+    with WriteNodeProcessing {
+
+  private val extractor = new CatalogTableExtractor(session.catalog, pathQualifier)
+
+  override val writeNodeProcessor: PartialFunction[LogicalPlan, WriteNodeInfo] = {
+    case `_: DataBricksCreateDeltaTableCommand`(command) =>
+      val table = extractFieldValue[CatalogTable](command, "table")
+      val saveMode = extractFieldValue[SaveMode](command, "mode")
+      val query = extractFieldValue[Option[LogicalPlan]](command, "query").get
+      extractor.asTableWrite(table, saveMode, query)
+  }
+}
+
+object DatabricksPlugin {
+
+  private object `_: DataBricksCreateDeltaTableCommand` extends SafeTypeMatchingExtractor[AnyRef](
+    "com.databricks.sql.transaction.tahoe.commands.CreateDeltaTableCommand")
+}
diff --git a/core/src/main/scala/za/co/absa/spline/harvester/plugin/embedded/SQLPlugin.scala b/core/src/main/scala/za/co/absa/spline/harvester/plugin/embedded/SQLPlugin.scala
@@ -18,17 +18,17 @@ package za.co.absa.spline.harvester.plugin.embedded
 
 import javax.annotation.Priority
 import org.apache.spark.sql.SaveMode.{Append, Overwrite}
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, HiveTableRelation}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, HiveTableRelation}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, CreateTableCommand, DropTableCommand}
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoDataSourceCommand, InsertIntoHadoopFsRelationCommand, LogicalRelation}
 import org.apache.spark.sql.hive.execution.{CreateHiveTableAsSelectCommand, InsertIntoHiveTable}
 import org.apache.spark.sql.{SaveMode, SparkSession}
 import za.co.absa.commons.reflect.extractors.SafeTypeMatchingExtractor
 import za.co.absa.spline.harvester.builder._
-import za.co.absa.spline.harvester.plugin.Plugin.{Params, Precedence, ReadNodeInfo, WriteNodeInfo}
+import za.co.absa.spline.harvester.plugin.Plugin.{Precedence, ReadNodeInfo, WriteNodeInfo}
 import za.co.absa.spline.harvester.plugin.embedded.SQLPlugin._
+import za.co.absa.spline.harvester.plugin.extractor.CatalogTableExtractor
 import za.co.absa.spline.harvester.plugin.{Plugin, ReadNodeProcessing, WriteNodeProcessing}
 import za.co.absa.spline.harvester.qualifier.PathQualifier
 
@@ -42,14 +42,16 @@ class SQLPlugin(
     with ReadNodeProcessing
     with WriteNodeProcessing {
 
+  private val extractor = new CatalogTableExtractor(session.catalog, pathQualifier)
+
   override val readNodeProcessor: PartialFunction[LogicalPlan, ReadNodeInfo] = {
     case htr: HiveTableRelation =>
-      asTableRead(htr.tableMeta)
+      extractor.asTableRead(htr.tableMeta)
 
     case lr: LogicalRelation if lr.relation.isInstanceOf[HadoopFsRelation] =>
       val hr = lr.relation.asInstanceOf[HadoopFsRelation]
       lr.catalogTable
-        .map(asTableRead)
+        .map(extractor.asTableRead)
         .getOrElse {
           val uris = hr.location.rootPaths.map(path => pathQualifier.qualify(path.toString))
           val fileFormat = hr.fileFormat
@@ -62,7 +64,7 @@ class SQLPlugin(
     case cmd: InsertIntoHadoopFsRelationCommand if cmd.catalogTable.isDefined =>
       val catalogTable = cmd.catalogTable.get
       val mode = if (cmd.mode == SaveMode.Overwrite) Overwrite else Append
-      asTableWrite(catalogTable, mode, cmd.query)
+      extractor.asTableWrite(catalogTable, mode, cmd.query)
 
     case cmd: InsertIntoHadoopFsRelationCommand =>
       val path = cmd.outputPath.toString
@@ -81,77 +83,45 @@ class SQLPlugin(
       (SourceIdentifier(Some(format), qPath), mode, cmd.query, Map.empty)
 
     case `_: InsertIntoDataSourceDirCommand`(cmd) =>
-      asDirWrite(cmd.storage, cmd.provider, cmd.overwrite, cmd.query)
+      extractor.asDirWrite(cmd.storage, cmd.provider, cmd.overwrite, cmd.query)
 
     case `_: InsertIntoHiveDirCommand`(cmd) =>
-      asDirWrite(cmd.storage, "hive", cmd.overwrite, cmd.query)
+      extractor.asDirWrite(cmd.storage, "hive", cmd.overwrite, cmd.query)
 
     case `_: InsertIntoHiveTable`(cmd) =>
       val mode = if (cmd.overwrite) Overwrite else Append
-      asTableWrite(cmd.table, mode, cmd.query)
+      extractor.asTableWrite(cmd.table, mode, cmd.query)
 
     case `_: CreateHiveTableAsSelectCommand`(cmd) =>
-      val sourceId = asTableSourceId(cmd.tableDesc)
+      val sourceId = extractor.asTableSourceId(cmd.tableDesc)
       (sourceId, cmd.mode, cmd.query, Map.empty)
 
     case cmd: CreateDataSourceTableAsSelectCommand =>
-      asTableWrite(cmd.table, cmd.mode, cmd.query)
+      extractor.asTableWrite(cmd.table, cmd.mode, cmd.query)
 
     case dtc: DropTableCommand =>
-      val uri = asTableURI(dtc.tableName)
+      val uri = extractor.asTableURI(dtc.tableName)
       val sourceId = SourceIdentifier(None, pathQualifier.qualify(uri))
       (sourceId, Overwrite, dtc, Map.empty)
 
     case ctc: CreateTableCommand =>
-      val sourceId = asTableSourceId(ctc.table)
+      val sourceId = extractor.asTableSourceId(ctc.table)
       (sourceId, Overwrite, ctc, Map.empty)
   }
-
-  private def asTableURI(tableIdentifier: TableIdentifier): String = {
-    val catalog = session.catalog
-    val TableIdentifier(tableName, maybeTableDatabase) = tableIdentifier
-    val databaseName = maybeTableDatabase getOrElse catalog.currentDatabase
-    val databaseLocation = catalog.getDatabase(databaseName).locationUri.stripSuffix("/")
-    s"$databaseLocation/${tableName.toLowerCase}"
-  }
-
-  private def asTableSourceId(table: CatalogTable): SourceIdentifier = {
-    val uri = table.storage.locationUri
-      .map(_.toString)
-      .getOrElse(asTableURI(table.identifier))
-    SourceIdentifier(table.provider, pathQualifier.qualify(uri))
-  }
-
-  private def asTableRead(ct: CatalogTable) = {
-    val sourceId = asTableSourceId(ct)
-    val params = Map(
-      "table" -> Map(
-        "identifier" -> ct.identifier,
-        "storage" -> ct.storage))
-    (sourceId, params)
-  }
-
-  private def asTableWrite(table: CatalogTable, mode: SaveMode, query: LogicalPlan) = {
-    val sourceIdentifier = asTableSourceId(table)
-    (sourceIdentifier, mode, query, Map("table" -> Map("identifier" -> table.identifier, "storage" -> table.storage)))
-  }
-
-  private def asDirWrite(storage: CatalogStorageFormat, provider: String, overwrite: Boolean, query: LogicalPlan) = {
-    val uri = storage.locationUri.getOrElse(sys.error(s"Cannot determine the data source location: $storage"))
-    val mode = if (overwrite) Overwrite else Append
-    (SourceIdentifier(Some(provider), uri.toString), mode, query, Map.empty: Params)
-  }
 }
 
 object SQLPlugin {
 
   private object `_: InsertIntoHiveTable` extends SafeTypeMatchingExtractor(classOf[InsertIntoHiveTable])
 
-  private object `_: CreateHiveTableAsSelectCommand` extends SafeTypeMatchingExtractor(classOf[CreateHiveTableAsSelectCommand])
+  private object `_: CreateHiveTableAsSelectCommand` extends SafeTypeMatchingExtractor(
+    classOf[CreateHiveTableAsSelectCommand])
 
-  private object `_: InsertIntoHiveDirCommand` extends SafeTypeMatchingExtractor[InsertIntoHiveDirCommand]("org.apache.spark.sql.hive.execution.InsertIntoHiveDirCommand")
+  private object `_: InsertIntoHiveDirCommand` extends SafeTypeMatchingExtractor[InsertIntoHiveDirCommand](
+    "org.apache.spark.sql.hive.execution.InsertIntoHiveDirCommand")
 
-  private object `_: InsertIntoDataSourceDirCommand` extends SafeTypeMatchingExtractor[InsertIntoDataSourceDirCommand]("org.apache.spark.sql.execution.command.InsertIntoDataSourceDirCommand")
+  private object `_: InsertIntoDataSourceDirCommand` extends SafeTypeMatchingExtractor[InsertIntoDataSourceDirCommand](
+    "org.apache.spark.sql.execution.command.InsertIntoDataSourceDirCommand")
 
   private type InsertIntoHiveDirCommand = {
     def storage: CatalogStorageFormat

diff --git a/core/src/main/scala/za/co/absa/spline/harvester/plugin/extractor/CatalogTableExtractor.scala b/core/src/main/scala/za/co/absa/spline/harvester/plugin/extractor/CatalogTableExtractor.scala
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2020 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.spline.harvester.plugin.extractor
+import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.SaveMode.{Append, Overwrite}
+import org.apache.spark.sql.catalog.Catalog
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import za.co.absa.spline.harvester.builder.SourceIdentifier
+import za.co.absa.spline.harvester.plugin.Plugin.{Params, WriteNodeInfo}
+import za.co.absa.spline.harvester.qualifier.PathQualifier
+
+class CatalogTableExtractor(catalog: Catalog, pathQualifier: PathQualifier) {
+
+  def asTableURI(tableIdentifier: TableIdentifier): String = {
+    val TableIdentifier(tableName, maybeTableDatabase) = tableIdentifier
+    val databaseName = maybeTableDatabase getOrElse catalog.currentDatabase
+    val databaseLocation = catalog.getDatabase(databaseName).locationUri.stripSuffix("/")
+    s"$databaseLocation/${tableName.toLowerCase}"
+  }
+
+  def asTableSourceId(table: CatalogTable): SourceIdentifier = {
+    val uri = table.storage.locationUri
+      .map(_.toString)
+      .getOrElse(asTableURI(table.identifier))
+    SourceIdentifier(table.provider, pathQualifier.qualify(uri))
+  }
+
+  def asTableRead(ct: CatalogTable): (SourceIdentifier, Map[String, Any]) = {
+    val sourceId = asTableSourceId(ct)
+    val params = Map(
+      "table" -> Map(
+        "identifier" -> ct.identifier,
+        "storage" -> ct.storage))
+    (sourceId, params)
+  }
+
+  def asTableWrite(table: CatalogTable, mode: SaveMode, query: LogicalPlan): WriteNodeInfo = {
+    val sourceIdentifier = asTableSourceId(table)
+    (sourceIdentifier, mode, query, Map("table" -> Map("identifier" -> table.identifier, "storage" -> table.storage)))
+  }
+
+  def asDirWrite(storage: CatalogStorageFormat, provider: String, overwrite: Boolean, query: LogicalPlan): WriteNodeInfo = {
+    val uri = storage.locationUri.getOrElse(sys.error(s"Cannot determine the data source location: $storage"))
+    val mode = if (overwrite) Overwrite else Append
+    (SourceIdentifier(Some(provider), uri.toString), mode, query, Map.empty: Params)
+  }
+}