From 66640df34226b3370226c73d84dbf4b3ddcf49f5 Mon Sep 17 00:00:00 2001 From: Hongdan Zhu Date: Mon, 28 Apr 2025 16:17:21 -0700 Subject: [PATCH] HIVE-28655: Implement HMS Related Drop Stats Changes, Reset COLUMN_STAT_ACCURATE After Dropping --- .../TestCachedStoreUpdateUsingEvents.java | 11 +- .../clientpositive/llap/acid_stats4.q.out | 165 +++++++++++++++++- .../llap/alter_table_column_stats.q.out | 24 +-- ...ncompatible_vectorization_false_date.q.out | 4 + .../clientpositive/llap/orc_drop_column.q.out | 2 - .../llap/parquet_drop_column.q.out | 2 - .../rename_external_partition_location.q.out | 2 - .../llap/rename_partition_location.q.out | 2 + .../hadoop/hive/common/StatsSetupConst.java | 7 +- .../hadoop/hive/metastore/HMSHandler.java | 9 + .../hadoop/hive/metastore/ObjectStore.java | 2 + .../hive/metastore/TestHiveMetaStore.java | 16 ++ 12 files changed, 211 insertions(+), 35 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStoreUpdateUsingEvents.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStoreUpdateUsingEvents.java index 2db61aa6c06a..ab3c3f07999d 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStoreUpdateUsingEvents.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/cache/TestCachedStoreUpdateUsingEvents.java @@ -829,7 +829,16 @@ private String getValidWriteIds(String dbName, String tblName) throws Throwable private void validateTablePara(String dbName, String tblName) throws Throwable { Table tblRead = rawStore.getTable(DEFAULT_CATALOG_NAME, dbName, tblName); Table tblRead1 = sharedCache.getTableFromCache(DEFAULT_CATALOG_NAME, dbName, tblName); - Assert.assertEquals(tblRead.getParameters(), tblRead1.getParameters()); + // Prepare both the expected and actual table parameters + Map expected = new HashMap<>(tblRead.getParameters()); + Map actual = new HashMap<>(tblRead1.getParameters()); + + // Remove the COLUMN_STATS_ACCURATE entry from both maps, because it is now completely removed + expected.remove("COLUMN_STATS_ACCURATE"); + actual.remove("COLUMN_STATS_ACCURATE"); + + // Now assert equality without the COLUMN_STATS_ACCURATE key + Assert.assertEquals(expected, actual); } private void validatePartPara(String dbName, String tblName, String partName) throws Throwable { diff --git a/ql/src/test/results/clientpositive/llap/acid_stats4.q.out b/ql/src/test/results/clientpositive/llap/acid_stats4.q.out index d5d9e2d32c4c..3cde15edd7f8 100644 --- a/ql/src/test/results/clientpositive/llap/acid_stats4.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_stats4.q.out @@ -567,36 +567,137 @@ POSTHOOK: Output: default@stats_part@p=104 PREHOOK: query: explain select count(key) from stats_part where p = 101 PREHOOK: type: QUERY PREHOOK: Input: default@stats_part +PREHOOK: Input: default@stats_part@p=101 #### A masked pattern was here #### POSTHOOK: query: explain select count(key) from stats_part where p = 101 POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_part +POSTHOOK: Input: default@stats_part@p=101 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_part + filterExpr: (p = 101) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(key) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator - limit: 1 + limit: -1 Processor Tree: ListSink PREHOOK: query: explain select count(key) from stats_part PREHOOK: type: QUERY PREHOOK: Input: default@stats_part +PREHOOK: Input: default@stats_part@p=101 +PREHOOK: Input: default@stats_part@p=103 +PREHOOK: Input: default@stats_part@p=104 #### A masked pattern was here #### POSTHOOK: query: explain select count(key) from stats_part POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_part +POSTHOOK: Input: default@stats_part@p=101 +POSTHOOK: Input: default@stats_part@p=103 +POSTHOOK: Input: default@stats_part@p=104 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_part + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(key) + minReductionHashAggr: 0.6666666 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator - limit: 1 + limit: -1 Processor Tree: ListSink @@ -721,18 +822,70 @@ STAGE PLANS: PREHOOK: query: explain select count(value) from stats_part PREHOOK: type: QUERY PREHOOK: Input: default@stats_part +PREHOOK: Input: default@stats_part@p=101 +PREHOOK: Input: default@stats_part@p=103 +PREHOOK: Input: default@stats_part@p=104 #### A masked pattern was here #### POSTHOOK: query: explain select count(value) from stats_part POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_part +POSTHOOK: Input: default@stats_part@p=101 +POSTHOOK: Input: default@stats_part@p=103 +POSTHOOK: Input: default@stats_part@p=104 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_part + Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(value) + minReductionHashAggr: 0.6666666 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: may be used (ACID table) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator - limit: 1 + limit: -1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/llap/alter_table_column_stats.q.out b/ql/src/test/results/clientpositive/llap/alter_table_column_stats.q.out index 7384695e0a2e..441c6f9d1919 100644 --- a/ql/src/test/results/clientpositive/llap/alter_table_column_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/alter_table_column_stats.q.out @@ -1105,7 +1105,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 #### A masked pattern was here #### numFiles 2 @@ -1146,7 +1145,6 @@ Database: statsdb1 Table: testpart1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} numFiles 1 numRows 10 rawDataSize 154 @@ -1238,7 +1236,6 @@ Database: statsdb1 Table: testpart1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} numFiles 1 numRows 20 rawDataSize 312 @@ -1343,7 +1340,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 #### A masked pattern was here #### numFiles 2 @@ -1384,7 +1380,6 @@ Database: statsdb1 Table: testpart1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} numFiles 1 numRows 10 rawDataSize 154 @@ -1476,7 +1471,7 @@ Database: statsdb1 Table: testpart1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} + COLUMN_STATS_ACCURATE {} numFiles 1 numRows 20 rawDataSize 312 @@ -1581,7 +1576,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 #### A masked pattern was here #### numFiles 2 @@ -1622,7 +1616,6 @@ Database: statsdb1 Table: testpart1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} numFiles 1 numRows 10 rawDataSize 154 @@ -1714,7 +1707,7 @@ Database: statsdb1 Table: testpart1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} + COLUMN_STATS_ACCURATE {} numFiles 1 numRows 20 rawDataSize 312 @@ -1819,7 +1812,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 #### A masked pattern was here #### numFiles 2 @@ -3102,7 +3094,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 #### A masked pattern was here #### numFiles 2 @@ -3143,7 +3134,6 @@ Database: statsdb1 Table: testpart1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} numFiles 1 numRows 10 rawDataSize 154 @@ -3235,7 +3225,6 @@ Database: statsdb1 Table: testpart1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}} numFiles 1 numRows 20 rawDataSize 312 @@ -3340,7 +3329,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 #### A masked pattern was here #### numFiles 2 @@ -3381,7 +3369,6 @@ Database: statsdb1 Table: testpart1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} numFiles 1 numRows 10 rawDataSize 154 @@ -3473,7 +3460,7 @@ Database: statsdb1 Table: testpart1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\"}} + COLUMN_STATS_ACCURATE {} numFiles 1 numRows 20 rawDataSize 312 @@ -3578,7 +3565,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 #### A masked pattern was here #### numFiles 2 @@ -3619,7 +3605,6 @@ Database: statsdb1 Table: testpart1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} numFiles 1 numRows 10 rawDataSize 154 @@ -3711,7 +3696,7 @@ Database: statsdb1 Table: testpart1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col2\":\"true\"}} + COLUMN_STATS_ACCURATE {} numFiles 1 numRows 20 rawDataSize 312 @@ -3816,7 +3801,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} bucketing_version 2 #### A masked pattern was here #### numFiles 2 diff --git a/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date.q.out b/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date.q.out index 26b3d3487c5c..5f65a0df6683 100644 --- a/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date.q.out +++ b/ql/src/test/results/clientpositive/llap/change_allowincompatible_vectorization_false_date.q.out @@ -58,10 +58,14 @@ POSTHOOK: Output: default@change_allowincompatible_vectorization_false_date PREHOOK: query: select count(*) from change_allowincompatible_vectorization_false_date PREHOOK: type: QUERY PREHOOK: Input: default@change_allowincompatible_vectorization_false_date +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +PREHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=bbb #### A masked pattern was here #### POSTHOOK: query: select count(*) from change_allowincompatible_vectorization_false_date POSTHOOK: type: QUERY POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=aaa +POSTHOOK: Input: default@change_allowincompatible_vectorization_false_date@s=bbb #### A masked pattern was here #### 50 PREHOOK: query: insert into table change_allowincompatible_vectorization_false_date partition (s='aaa') values ('2038-03-22 07:26:48.0') diff --git a/ql/src/test/results/clientpositive/llap/orc_drop_column.q.out b/ql/src/test/results/clientpositive/llap/orc_drop_column.q.out index 2cca11a82814..a52c5b9cbe7b 100644 --- a/ql/src/test/results/clientpositive/llap/orc_drop_column.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_drop_column.q.out @@ -232,7 +232,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: EXTERNAL_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} EXTERNAL TRUE bucketing_version 2 #### A masked pattern was here #### @@ -274,7 +273,6 @@ Database: default Table: tbl_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} numFiles 1 numRows 3 rawDataSize 282 diff --git a/ql/src/test/results/clientpositive/llap/parquet_drop_column.q.out b/ql/src/test/results/clientpositive/llap/parquet_drop_column.q.out index 9532377036a5..354068ea3895 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_drop_column.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_drop_column.q.out @@ -232,7 +232,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: EXTERNAL_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} EXTERNAL TRUE bucketing_version 2 #### A masked pattern was here #### @@ -274,7 +273,6 @@ Database: default Table: tbl_parq #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} numFiles 1 numRows 3 rawDataSize 128 diff --git a/ql/src/test/results/clientpositive/llap/rename_external_partition_location.q.out b/ql/src/test/results/clientpositive/llap/rename_external_partition_location.q.out index 5d144c591cbe..6189730f2a51 100644 --- a/ql/src/test/results/clientpositive/llap/rename_external_partition_location.q.out +++ b/ql/src/test/results/clientpositive/llap/rename_external_partition_location.q.out @@ -263,7 +263,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: EXTERNAL_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} EXTERNAL TRUE bucketing_version 2 numFiles 1 @@ -303,7 +302,6 @@ Database: default Table: ex_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 10 rawDataSize 70 diff --git a/ql/src/test/results/clientpositive/llap/rename_partition_location.q.out b/ql/src/test/results/clientpositive/llap/rename_partition_location.q.out index f0536bde6dea..6717fdb35160 100644 --- a/ql/src/test/results/clientpositive/llap/rename_partition_location.q.out +++ b/ql/src/test/results/clientpositive/llap/rename_partition_location.q.out @@ -44,6 +44,7 @@ POSTHOOK: Output: default@rename_partition_table_n0@part=2 PREHOOK: query: SELECT count(*) FROM rename_partition_table_n0 where part = '2' PREHOOK: type: QUERY PREHOOK: Input: default@rename_partition_table_n0 +PREHOOK: Input: default@rename_partition_table_n0@part=2 #### A masked pattern was here #### 500 PREHOOK: query: CREATE TABLE rename_partition_table_2 (key STRING, value STRING) PARTITIONED BY (part STRING) @@ -63,6 +64,7 @@ PREHOOK: Output: default@rename_partition_table_2@part=1 PREHOOK: query: SELECT count(*) FROM rename_partition_table_2 where part = '2' PREHOOK: type: QUERY PREHOOK: Input: default@rename_partition_table_2 +PREHOOK: Input: default@rename_partition_table_2@part=2 #### A masked pattern was here #### 500 PREHOOK: query: DROP TABLE rename_partition_table_n0 diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java index f2c6ef27f571..ed5d0a390c82 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -374,8 +374,11 @@ public static void removeColumnStatsState(Map params, List - stats.columnStats.remove(colName.toLowerCase())); + colNames.forEach(colName -> { + if (colName != null) { + stats.columnStats.remove(colName.toLowerCase()); + } + }); try { params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats)); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java index e117eb3446b1..fe58039f173d 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java @@ -7502,6 +7502,15 @@ public boolean delete_column_statistics_req(DeleteColumnStatisticsRequest req) t } } } + // on the table level, partially delete(update) table level parameter COLUMN_STATS_ACCURATE + if (colNames == null || colNames.isEmpty()){ + // remove all column names in parameter COLUMN_STATS_ACCURATE + StatsSetupConst.clearColumnStatsState(table.getParameters()); + } else { + // remove the deleted column names in parameter COLUMN_STATS_ACCURATE + StatsSetupConst.removeColumnStatsState(table.getParameters(), colNames); + } + rawStore.alterTable(parsedDbName[CAT_NAME], parsedDbName[DB_NAME], tableName, table, null); committed = rawStore.commitTransaction(); } finally { if (!committed) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 5e5b91d642a5..0b6b38f3ab24 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -10260,6 +10260,8 @@ public boolean deletePartitionColumnStatistics(String catName, String dbName, St } dbName = org.apache.commons.lang3.StringUtils.defaultString(dbName, Warehouse.DEFAULT_DATABASE_NAME); catName = normalizeIdentifier(catName); + // use directSql to compeletely delete the parameter COLUMN_STATS_ACCUARTE on partition level + directSql.deleteColumnStatsState(getTable(catName, dbName, tableName).getId()); return new GetHelper(catName, dbName, tableName, true, true) { @Override protected String describeResult() { diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index ef9c6b380a8a..12b729112b77 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -1846,6 +1846,11 @@ public void testColumnStatistics() throws Throwable { List stats = client.getTableColumnStatistics( dbName, tblName, Lists.newArrayList(colName[1]), ENGINE); assertTrue("stats are not empty: " + stats, stats.isEmpty()); + // test if all columns are deleted from parameter COLUMN_STATS_ACCURATE + Map tableParams = client.getTable(dbName, tblName).getParameters(); + String table_column_stats_accurate = tableParams.get("COLUMN_STATS_ACCURATE"); + assertTrue("parameter COLUMN_STATS_ACCURATE is not accurate in " + tblName, table_column_stats_accurate == null || + (!table_column_stats_accurate.contains(colName[0]) && !table_column_stats_accurate.contains(colName[1]))); colStats.setStatsDesc(statsDesc); colStats.setStatsObj(statsObjs); @@ -1863,6 +1868,11 @@ public void testColumnStatistics() throws Throwable { // multiple columns request.setCol_names(Arrays.asList(colName)); assertTrue(client.deleteColumnStatistics(request)); + // test if the columns in colName array are deleted from parameter COLUMN_STATS_ACCURATE + tableParams = client.getTable(dbName, tblName).getParameters(); + table_column_stats_accurate = tableParams.get("COLUMN_STATS_ACCURATE"); + assertTrue("parameter COLUMN_STATS_ACCURATE is not accurate in " + tblName, table_column_stats_accurate == null || + (!table_column_stats_accurate.contains(colName[0]) && !table_column_stats_accurate.contains(colName[1]))); colStats3 = client.getTableColumnStatistics( dbName, tblName, Lists.newArrayList(colName), ENGINE); assertTrue("stats are not empty: " + colStats3, colStats3.isEmpty()); @@ -1958,6 +1968,12 @@ public void testColumnStatistics() throws Throwable { Lists.newArrayList(partitions.get(0), partitions.get(1), partitions.get(2)), Lists.newArrayList(colName), ENGINE); assertEquals(1, stats2.size()); assertEquals(2, stats2.get(partitions.get(2)).size()); + // test if all columns are deleted from parameter COLUMN_STATS_ACCURATE + Partition partition_0 = client.getPartition(dbName, tblName, partitions.get(0)); + Map partitionParams = partition_0.getParameters(); + String partition_column_stats_accurate = partitionParams.get("COLUMN_STATS_ACCURATE"); + assertTrue("parameter COLUMN_STATS_ACCURATE is not accurate in " + partitions.get(0),partition_column_stats_accurate == null || + (!table_column_stats_accurate.contains(colName[0]) && !table_column_stats_accurate.contains(colName[1]))); // no partition or column name is set request.unsetPart_names();