[SPARK-33588][SQL] Respect the spark.sql.caseSensitive config while…

… resolving partition spec in v1 `SHOW TABLE EXTENDED` Perform partition spec normalization in `ShowTablesCommand` according to the table schema before getting partitions from the catalog. The normalization via `PartitioningUtils.normalizePartitionSpec()` adjusts the column names in partition specification, w.r.t. the real partition column names and case sensitivity. Even when `spark.sql.caseSensitive` is `false` which is the default value, v1 `SHOW TABLE EXTENDED` is case sensitive: ```sql spark-sql> CREATE TABLE tbl1 (price int, qty int, year int, month int) > USING parquet > partitioned by (year, month); spark-sql> INSERT INTO tbl1 PARTITION(year = 2015, month = 1) SELECT 1, 1; spark-sql> SHOW TABLE EXTENDED LIKE 'tbl1' PARTITION(YEAR = 2015, Month = 1); Error in query: Partition spec is invalid. The spec (YEAR, Month) must match the partition spec (year, month) defined in table '`default`.`tbl1`'; ``` Yes. After the changes, the `SHOW TABLE EXTENDED` command respects the SQL config. And for example above, it returns correct result: ```sql spark-sql> SHOW TABLE EXTENDED LIKE 'tbl1' PARTITION(YEAR = 2015, Month = 1); default tbl1 false Partition Values: [year=2015, month=1] Location: file:/Users/maximgekk/spark-warehouse/tbl1/year=2015/month=1 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1, path=file:/Users/maximgekk/spark-warehouse/tbl1] Partition Parameters: {transient_lastDdlTime=1606595118, totalSize=623, numFiles=1} Created Time: Sat Nov 28 23:25:18 MSK 2020 Last Access: UNKNOWN Partition Statistics: 623 bytes ``` By running the modified test suite `v1/ShowTablesSuite` Closes apache#30529 from MaxGekk/show-table-case-sensitive-spec. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org> (cherry picked from commit 0054fc9) Signed-off-by: Max Gekk <max.gekk@gmail.com>
MaxGekk · Nov 30, 2020 · f09fcec · f09fcec
1 parent f6638cf
commit f09fcec
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 7 deletions.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -884,12 +884,17 @@ case class ShowTablesCommand(
       //
       // Note: tableIdentifierPattern should be non-empty, otherwise a [[ParseException]]
       // should have been thrown by the sql parser.
-      val tableIdent = TableIdentifier(tableIdentifierPattern.get, Some(db))
-      val table = catalog.getTableMetadata(tableIdent).identifier
-      val partition = catalog.getPartition(tableIdent, partitionSpec.get)
-      val database = table.database.getOrElse("")
-      val tableName = table.table
-      val isTemp = catalog.isTemporaryTable(table)
+      val table = catalog.getTableMetadata(TableIdentifier(tableIdentifierPattern.get, Some(db)))
+      val tableIdent = table.identifier
+      val normalizedSpec = PartitioningUtils.normalizePartitionSpec(
+        partitionSpec.get,
+        table.partitionColumnNames,
+        tableIdent.quotedString,
+        sparkSession.sessionState.conf.resolver)
+      val partition = catalog.getPartition(tableIdent, normalizedSpec)
+      val database = tableIdent.database.getOrElse("")
+      val tableName = tableIdent.table
+      val isTemp = catalog.isTemporaryTable(tableIdent)
       val information = partition.simpleString
       Seq(Row(database, tableName, isTemp, s"$information\n"))
     }

diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -224,7 +224,7 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(a='Us', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Partition spec is invalid. The spec (a, d) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`';
+a is not a valid partition column in table `showdb`.`show_t1`.;
 
 
 -- !query

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchPartitionException, NoSuchTableException, TempTableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
@@ -3030,6 +3031,27 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("SPARK-33588: case sensitivity of partition spec") {
+    val t = "part_table"
+    withTable(t) {
+      sql(s"""
+        |CREATE TABLE $t (price int, qty int, year int, month int)
+        |USING $dataSource
+        |PARTITIONED BY (year, month)""".stripMargin)
+      sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1")
+      Seq(
+        true -> "PARTITION(year = 2015, month = 1)",
+        false -> "PARTITION(YEAR = 2015, Month = 1)"
+      ).foreach { case (caseSensitive, partitionSpec) =>
+        withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+          val df = sql(s"SHOW TABLE EXTENDED LIKE '$t' $partitionSpec")
+          val information = df.select("information").first().getString(0)
+          assert(information.contains("Partition Values: [year=2015, month=1]"))
+        }
+      }
+    }
+  }
 }
 
 object FakeLocalFsFileSystem {