diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java index f799746e7d0a..9fa7bcaa90d5 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java @@ -90,7 +90,7 @@ public final class Catalogs { public static final String MATERIALIZED_VIEW_VERSION_PROPERTY_KEY = "iceberg.materialized.view.version"; public static final String MATERIALIZED_VIEW_ORIGINAL_TEXT = "iceberg.materialized.view.original.text"; - private static final String MATERIALIZED_VIEW_STORAGE_TABLE_IDENTIFIER_SUFFIX = "_storage_table"; + public static final String MATERIALIZED_VIEW_STORAGE_TABLE_IDENTIFIER_SUFFIX = "_storage_table"; private Catalogs() { } @@ -301,6 +301,7 @@ public static MaterializedView createMaterializedView( Schema schema = schema(props); PartitionSpec spec = spec(props, schema); String location = props.getProperty(LOCATION); + String storageTableLocation = location + MATERIALIZED_VIEW_STORAGE_TABLE_IDENTIFIER_SUFFIX; String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME); Optional catalog = loadCatalog(conf, catalogName); @@ -316,7 +317,8 @@ public static MaterializedView createMaterializedView( Map map = filterIcebergTableProperties(props); String storageTableIdentifier = name + MATERIALIZED_VIEW_STORAGE_TABLE_IDENTIFIER_SUFFIX; Table storageTable = catalog.get().buildTable(TableIdentifier.parse(storageTableIdentifier), schema) - .withPartitionSpec(spec).withLocation(location).withProperties(map).withSortOrder(sortOrder).create(); + .withPartitionSpec(spec).withLocation(storageTableLocation).withProperties(map).withSortOrder(sortOrder) + .create(); Map viewProperties = Maps.newHashMapWithExpectedSize(2); viewProperties.put(MATERIALIZED_VIEW_PROPERTY_KEY, "true"); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java index a7ba33624b30..a2e3da00574c 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java @@ -49,6 +49,7 @@ private InputFormatConfig() { public static final String TABLE_IDENTIFIER = "iceberg.mr.table.identifier"; public static final String TABLE_LOCATION = "iceberg.mr.table.location"; public static final String TABLE_SCHEMA = "iceberg.mr.table.schema"; + public static final String TABLE_TYPE = "iceberg.mr.table.type"; public static final String PARTITION_SPEC = "iceberg.mr.table.partition.spec"; public static final String SERIALIZED_TABLE_PREFIX = "iceberg.mr.serialized.table."; public static final String TABLE_CATALOG_PREFIX = "iceberg.mr.table.catalog."; diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 7554de2c588a..4ceaa045ac46 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -177,6 +177,7 @@ import org.apache.iceberg.expressions.ResidualEvaluator; import org.apache.iceberg.expressions.StrictMetricsEvaluator; import org.apache.iceberg.hadoop.ConfigProperties; +import org.apache.iceberg.hive.HiveOperationsBase; import org.apache.iceberg.hive.HiveSchemaUtil; import org.apache.iceberg.hive.HiveTableOperations; import org.apache.iceberg.hive.IcebergCatalogProperties; @@ -1606,6 +1607,7 @@ static void overlayTableProperties(Configuration configuration, TableDesc tableD .forEach(entry -> map.put(entry.getKey(), entry.getValue())); String location; + String objectType = "ICEBERG"; Schema schema; PartitionSpec spec; String bytes; @@ -1630,6 +1632,12 @@ static void overlayTableProperties(Configuration configuration, TableDesc tableD } location = map.get(hive_metastoreConstants.META_TABLE_LOCATION); + objectType = map.get(hive_metastoreConstants.META_OBJECT_TYPE); + + if (TableType.MATERIALIZED_VIEW.name().equals(map.get(hive_metastoreConstants.META_OBJECT_TYPE))) { + objectType = HiveOperationsBase.ICEBERG_VIEW_TYPE_VALUE; + } + bytes = SerializationUtil.serializeToBase64(null); try { @@ -1647,6 +1655,10 @@ static void overlayTableProperties(Configuration configuration, TableDesc tableD if (StringUtils.isNotBlank(location)) { map.put(InputFormatConfig.TABLE_LOCATION, location); } + if (StringUtils.isNotBlank(objectType)) { + map.put(InputFormatConfig.TABLE_TYPE, objectType); + } + String schemaJson = SchemaParser.toJson(schema); map.put(InputFormatConfig.TABLE_SCHEMA, schemaJson); // save schema into table props as well to avoid repeatedly hitting the HMS during serde initializations diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java index fbdc36be3a19..9d74b1715cb0 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java @@ -65,6 +65,7 @@ import org.apache.iceberg.hadoop.HadoopConfigurable; import org.apache.iceberg.hadoop.HadoopFileIO; import org.apache.iceberg.hadoop.Util; +import org.apache.iceberg.hive.HiveOperationsBase; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.mapping.NameMapping; @@ -240,6 +241,10 @@ public static Table deserializeTable(Configuration config, String name) { if (table == null && config.getBoolean(hive_metastoreConstants.TABLE_IS_CTAS, false) && StringUtils.isNotBlank(location)) { + String type = config.get(InputFormatConfig.TABLE_TYPE); + if (HiveOperationsBase.ICEBERG_VIEW_TYPE_VALUE.equals(type)) { + location += Catalogs.MATERIALIZED_VIEW_STORAGE_TABLE_IDENTIFIER_SUFFIX; + } table = readTableObjectFromFile(location, config); } checkAndSetIoConfig(config, table); diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/mv_iceberg_orc9.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/mv_iceberg_orc9.q.out index 5a6f64b10da1..2ddb34254225 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/mv_iceberg_orc9.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/mv_iceberg_orc9.q.out @@ -461,11 +461,61 @@ Outdated for Rewriting: No # Materialized View Source table information Table name Snapshot default.tbl_ice SnapshotContext{snapshotId=#SnapshotId#} +PREHOOK: query: create external table tbl_ice_v2(d int, e string, f int) stored by iceberg stored as orc tblproperties ('format-version'='2') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl_ice_v2 +POSTHOOK: query: create external table tbl_ice_v2(d int, e string, f int) stored by iceberg stored as orc tblproperties ('format-version'='2') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl_ice_v2 +PREHOOK: query: insert into tbl_ice_v2 values (1, 'one v2', 50), (4, 'four v2', 53), (5, 'five v2', 54) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_ice_v2 +POSTHOOK: query: insert into tbl_ice_v2 values (1, 'one v2', 50), (4, 'four v2', 53), (5, 'five v2', 54) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_ice_v2 +PREHOOK: query: create materialized view mat1_multiple_tables stored by iceberg as +select tbl_ice.b, tbl_ice.c, sum(tbl_ice_v2.f) +from tbl_ice +join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52 +group by tbl_ice.b, tbl_ice.c +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@tbl_ice +PREHOOK: Input: default@tbl_ice_v2 +PREHOOK: Output: database:default +PREHOOK: Output: default@mat1_multiple_tables +#### A masked pattern was here #### +POSTHOOK: query: create materialized view mat1_multiple_tables stored by iceberg as +select tbl_ice.b, tbl_ice.c, sum(tbl_ice_v2.f) +from tbl_ice +join tbl_ice_v2 on tbl_ice.a=tbl_ice_v2.d where tbl_ice.c > 52 +group by tbl_ice.b, tbl_ice.c +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@tbl_ice +POSTHOOK: Input: default@tbl_ice_v2 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mat1_multiple_tables +#### A masked pattern was here #### +POSTHOOK: Lineage: mat1_multiple_tables._c2 EXPRESSION [(tbl_ice_v2)tbl_ice_v2.FieldSchema(name:f, type:int, comment:null), ] +POSTHOOK: Lineage: mat1_multiple_tables.b SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:b, type:string, comment:null), ] +POSTHOOK: Lineage: mat1_multiple_tables.c SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:c, type:int, comment:null), ] +PREHOOK: query: delete from tbl_ice_v2 where d = 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice_v2 +PREHOOK: Output: default@tbl_ice_v2 +POSTHOOK: query: delete from tbl_ice_v2 where d = 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice_v2 +POSTHOOK: Output: default@tbl_ice_v2 PREHOOK: query: SHOW MATERIALIZED VIEWS PREHOOK: type: SHOWMATERIALIZEDVIEWS POSTHOOK: query: SHOW MATERIALIZED VIEWS POSTHOOK: type: SHOWMATERIALIZEDVIEWS # MV Name Rewriting Enabled Mode Incremental rebuild +mat1_multiple_tables Yes Manual refresh Available for insert operations only mat1_orc Yes Manual refresh Available for insert operations only mat1_orc_partitioned Yes Manual refresh Available for insert operations only mat2_orc Yes Manual refresh Available for insert operations only diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index fa45fbbef703..c8e027b7c307 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.LlapOutputFormat; import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; @@ -365,6 +366,8 @@ public static TableDesc getTableDesc(CreateMaterializedViewDesc crtViewDesc, Str properties.setProperty( hive_metastoreConstants.META_TABLE_NAME, crtViewDesc.getViewName()); } + + properties.setProperty(hive_metastoreConstants.META_OBJECT_TYPE, TableType.MATERIALIZED_VIEW.name()); return ret; } diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp index 9f0b0c8cf8d4..582d66b801b3 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp @@ -57,6 +57,8 @@ hive_metastoreConstants::hive_metastoreConstants() { META_TABLE_SERDE = "serde"; + META_OBJECT_TYPE = "object_type"; + META_TABLE_PARTITION_COLUMNS = "partition_columns"; META_TABLE_PARTITION_COLUMN_TYPES = "partition_columns.types"; diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h index 504b54a01d99..0af288c06425 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h @@ -38,6 +38,7 @@ class hive_metastoreConstants { std::string META_TABLE_DB; std::string META_TABLE_LOCATION; std::string META_TABLE_SERDE; + std::string META_OBJECT_TYPE; std::string META_TABLE_PARTITION_COLUMNS; std::string META_TABLE_PARTITION_COLUMN_TYPES; std::string FILE_INPUT_FORMAT; diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java index f5a102ab9647..8fae1c8ea322 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java @@ -55,6 +55,8 @@ public static final java.lang.String META_TABLE_SERDE = "serde"; + public static final java.lang.String META_OBJECT_TYPE = "object_type"; + public static final java.lang.String META_TABLE_PARTITION_COLUMNS = "partition_columns"; public static final java.lang.String META_TABLE_PARTITION_COLUMN_TYPES = "partition_columns.types"; diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php index 84961065fd53..c71f411375fb 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php @@ -41,6 +41,7 @@ final class Constant extends \Thrift\Type\TConstant static protected $META_TABLE_DB; static protected $META_TABLE_LOCATION; static protected $META_TABLE_SERDE; + static protected $META_OBJECT_TYPE; static protected $META_TABLE_PARTITION_COLUMNS; static protected $META_TABLE_PARTITION_COLUMN_TYPES; static protected $FILE_INPUT_FORMAT; @@ -178,6 +179,11 @@ protected static function init_META_TABLE_SERDE() return "serde"; } + protected static function init_META_OBJECT_TYPE() + { + return "object_type"; + } + protected static function init_META_TABLE_PARTITION_COLUMNS() { return "partition_columns"; diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py index b5891397a6e2..9dd96549126f 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py @@ -35,6 +35,7 @@ META_TABLE_DB = "db" META_TABLE_LOCATION = "location" META_TABLE_SERDE = "serde" +META_OBJECT_TYPE = "object_type" META_TABLE_PARTITION_COLUMNS = "partition_columns" META_TABLE_PARTITION_COLUMN_TYPES = "partition_columns.types" FILE_INPUT_FORMAT = "file.inputformat" diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb index e7c30a2c4dc1..b68b99dbcd48 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb @@ -53,6 +53,8 @@ META_TABLE_SERDE = %q"serde" +META_OBJECT_TYPE = %q"object_type" + META_TABLE_PARTITION_COLUMNS = %q"partition_columns" META_TABLE_PARTITION_COLUMN_TYPES = %q"partition_columns.types" diff --git a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift index 962adb5fd3df..9b33d9baae3b 100644 --- a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift +++ b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift @@ -3368,6 +3368,7 @@ const string META_TABLE_NAME = "name", const string META_TABLE_DB = "db", const string META_TABLE_LOCATION = "location", const string META_TABLE_SERDE = "serde", +const string META_OBJECT_TYPE = "object_type", const string META_TABLE_PARTITION_COLUMNS = "partition_columns", const string META_TABLE_PARTITION_COLUMN_TYPES = "partition_columns.types", const string FILE_INPUT_FORMAT = "file.inputformat",