From 3844ea1b990779f47ee034723c7de6d98aac14ec Mon Sep 17 00:00:00 2001 From: "Daniel Q. Kim" Date: Fri, 5 Jun 2026 08:48:25 +0200 Subject: [PATCH] Antalya 26.3: Fix empty partition_key and sorting_key in system.tables for Iceberg tables without data snapshots Changelog category: Bug Fix Changelog entry: Fixed `system.tables.partition_key` and `system.tables.sorting_key` returning empty strings for Iceberg tables that have no data snapshot, including all empty tables and (more frequently) tables accessed via the Glue catalog. The snapshot-existence gate in IcebergMetadata::partitionKey() / sortingKey() was semantically wrong: partition spec and sort order are table-level properties recorded at the top level of the Iceberg metadata file (`default-spec-id`, `default-sort-order-id`) and exist independently of whether any data snapshot has been written. Also adds a defensive guard in getSortingKeyDescriptionFromMetadata against Iceberg V1 metadata files missing `sort-orders`, which becomes reachable for empty tables after this fix. Closes #1235. --- .../ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp | 4 ---- src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp | 5 +++++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp index 95daff72ea11..6016be1cd341 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp @@ -1156,16 +1156,12 @@ std::optional IcebergMetadata::totalBytes(ContextPtr local_context) cons std::optional IcebergMetadata::partitionKey(ContextPtr context) const { auto [actual_data_snapshot, actual_table_state_snapshot] = getRelevantState(context); - if (!actual_data_snapshot) - return std::nullopt; return getPartitionKey(context, actual_table_state_snapshot); } std::optional IcebergMetadata::sortingKey(ContextPtr context) const { auto [actual_data_snapshot, actual_table_state_snapshot] = getRelevantState(context); - if (!actual_data_snapshot) - return std::nullopt; auto metadata_object = getMetadataJSONObject( actual_table_state_snapshot.metadata_file_path, object_storage, diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp index 1e2afa3ee106..8564eebda3ef 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp @@ -1324,6 +1324,11 @@ std::pair parseTableSchemaV1Method(const Poco::J KeyDescription getSortingKeyDescriptionFromMetadata(Poco::JSON::Object::Ptr metadata_object, const NamesAndTypesList & ch_schema, ContextPtr local_context) { + // sort-orders / default-sort-order-id are optional in Iceberg V1 metadata + // (required only from V2); an unsorted table uses the no-op order-id 0. + // Treat their absence as "no sort order" rather than dereferencing a missing field. + if (!metadata_object->has(f_default_sort_order_id) || !metadata_object->has(f_sort_orders)) + return KeyDescription{}; auto sort_order_id = metadata_object->getValue(f_default_sort_order_id); Poco::JSON::Array::Ptr sort_orders = metadata_object->getArray(f_sort_orders); std::unordered_map source_id_to_column_name;