diff --git a/src/Client/BuzzHouse/Generator/TableSetttings.cpp b/src/Client/BuzzHouse/Generator/TableSetttings.cpp index e6c95744641f..15d4d1711ddb 100644 --- a/src/Client/BuzzHouse/Generator/TableSetttings.cpp +++ b/src/Client/BuzzHouse/Generator/TableSetttings.cpp @@ -349,6 +349,7 @@ static std::unordered_map ipTreeLayoutSettings = {{"ACCESS_TO static std::unordered_map dataLakeSettings = {{"allow_dynamic_metadata_for_data_lakes", CHSetting(trueOrFalse, {}, false)}, {"allow_experimental_delta_kernel_rs", CHSetting(trueOrFalse, {}, false)}, + {"allow_local_data_lakes", CHSetting(trueOrFalse, {}, false)}, {"iceberg_format_version", CHSetting([](RandomGenerator & rg) { return rg.nextBool() ? "1" : "2"; }, {}, false)}, {"iceberg_metadata_compression_method", CHSetting([](RandomGenerator & rg) { return "'" + rg.pickRandomly(compressionMethods) + "'"; }, {}, false)}, diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 4a5fc0821e8a..72de0663741c 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -6857,6 +6857,9 @@ Use roaring bitmap for iceberg positional deletes. )", 0) \ DECLARE(Bool, serialize_string_in_memory_with_zero_byte, true, R"( Serialize String values during aggregation with zero byte at the end. Enable to keep compatibility when querying cluster of incompatible versions. +)", 0) \ + DECLARE(Bool, allow_local_data_lakes, false, R"( +Allow using local data lake engines and table functions (IcebergLocal, DeltaLakeLocal, etc.). )", 0) \ \ /* ####################################################### */ \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index e506b470f068..5bd62541ff60 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -39,7 +39,11 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory() /// controls new feature and it's 'true' by default, use 'false' as previous_value). /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) /// Note: please check if the key already exists to prevent duplicate entries. - addSettingsChanges(settings_changes_history, "25.8.13.10000", + addSettingsChanges(settings_changes_history, "25.8.16.10002", + { + {"allow_local_data_lakes", false, false, "New setting to guard local data lake engines and table functions"}, + }); + addSettingsChanges(settings_changes_history, "25.8.13.10001", { {"show_data_lake_catalogs_in_system_tables", false, true, "Disable catalogs in system tables by default"}, diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp index 708e9ae34b9b..c7279d14789e 100644 --- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp @@ -23,10 +23,12 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int SUPPORT_IS_DISABLED; } namespace Setting { + extern const SettingsBool allow_local_data_lakes; extern const SettingsBool write_full_path_in_iceberg_metadata; } @@ -269,6 +271,11 @@ void registerStorageIceberg(StorageFactory & factory) IcebergLocalDefinition::storage_engine_name, [&](const StorageFactory::Arguments & args) { + if (!args.getLocalContext()->getSettingsRef()[Setting::allow_local_data_lakes]) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "IcebergLocal is disabled. Set `allow_local_data_lakes` to enable it"); + const auto storage_settings = getDataLakeStorageSettings(*args.storage_def); auto configuration = std::make_shared(storage_settings); return createStorageObjectStorage(args, configuration); @@ -338,6 +345,11 @@ void registerStorageDeltaLake(StorageFactory & factory) DeltaLakeLocalDefinition::storage_engine_name, [&](const StorageFactory::Arguments & args) { + if (!args.getLocalContext()->getSettingsRef()[Setting::allow_local_data_lakes]) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "DeltaLakeLocal is disabled. Set `allow_local_data_lakes` to enable it"); + const auto storage_settings = getDataLakeStorageSettings(*args.storage_def); auto configuration = std::make_shared(storage_settings); return createStorageObjectStorage(args, configuration); diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp index 959ac829a8e1..755f987def15 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.cpp +++ b/src/TableFunctions/TableFunctionObjectStorage.cpp @@ -32,6 +32,7 @@ namespace DB namespace Setting { + extern const SettingsBool allow_local_data_lakes; extern const SettingsUInt64 allow_experimental_parallel_reading_from_replicas; extern const SettingsBool parallel_replicas_for_cluster_engines; extern const SettingsString cluster_for_parallel_replicas; @@ -41,6 +42,7 @@ namespace Setting namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int SUPPORT_IS_DISABLED; } template @@ -95,6 +97,15 @@ TableFunctionObjectStorage::createEmpty template void TableFunctionObjectStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) { + if constexpr (std::is_same_v || std::is_same_v) + { + if (!context->getSettingsRef()[Setting::allow_local_data_lakes]) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "Table function '{}' is disabled. Set `allow_local_data_lakes` to enable it", + getName()); + } + /// Clone ast function, because we can modify its arguments like removing headers. auto ast_copy = ast_function->clone(); ASTs & args_func = ast_copy->children; diff --git a/tests/integration/test_storage_delta/configs/users.d/enable_writes.xml b/tests/integration/test_storage_delta/configs/users.d/enable_writes.xml index fc2a6bd782ce..b9b93243fc0b 100644 --- a/tests/integration/test_storage_delta/configs/users.d/enable_writes.xml +++ b/tests/integration/test_storage_delta/configs/users.d/enable_writes.xml @@ -2,6 +2,7 @@ 1 + 1 diff --git a/tests/integration/test_storage_iceberg/configs/users.d/users.xml b/tests/integration/test_storage_iceberg/configs/users.d/users.xml index 4b6ba057ecb1..aaae3e0d52e7 100644 --- a/tests/integration/test_storage_iceberg/configs/users.d/users.xml +++ b/tests/integration/test_storage_iceberg/configs/users.d/users.xml @@ -6,4 +6,9 @@ 1 + + + 1 + + diff --git a/tests/integration/test_storage_iceberg_schema_evolution/configs/users.d/users.xml b/tests/integration/test_storage_iceberg_schema_evolution/configs/users.d/users.xml index 4b6ba057ecb1..aaae3e0d52e7 100644 --- a/tests/integration/test_storage_iceberg_schema_evolution/configs/users.d/users.xml +++ b/tests/integration/test_storage_iceberg_schema_evolution/configs/users.d/users.xml @@ -6,4 +6,9 @@ 1 + + + 1 + + diff --git a/tests/integration/test_storage_iceberg_schema_evolution/test.py b/tests/integration/test_storage_iceberg_schema_evolution/test.py index 23cb2c1fa1cb..da8da5699938 100644 --- a/tests/integration/test_storage_iceberg_schema_evolution/test.py +++ b/tests/integration/test_storage_iceberg_schema_evolution/test.py @@ -38,7 +38,7 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/config.d/cluster.xml", "configs/config.d/named_collections.xml"], - user_configs=[], + user_configs=["configs/users.d/users.xml"], with_minio=True, with_azurite=True, stay_alive=True, diff --git a/tests/queries/0_stateless/03581_iceberg_parse_partition.sql b/tests/queries/0_stateless/03581_iceberg_parse_partition.sql index ba96468422c1..284edd2567ab 100644 --- a/tests/queries/0_stateless/03581_iceberg_parse_partition.sql +++ b/tests/queries/0_stateless/03581_iceberg_parse_partition.sql @@ -1,3 +1,4 @@ -- Tags: no-fasttest +SET allow_local_data_lakes = 1; CREATE TABLE t0 (c0 Nullable(Int)) ENGINE = IcebergLocal('/file0') PARTITION BY (`c0.null` IS NULL); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/03784_bad_base_backup.sh b/tests/queries/0_stateless/03784_bad_base_backup.sh index d95c3fd6f10e..0923f6f480d9 100755 --- a/tests/queries/0_stateless/03784_bad_base_backup.sh +++ b/tests/queries/0_stateless/03784_bad_base_backup.sh @@ -22,6 +22,7 @@ function thread() $CLICKHOUSE_CLIENT --query=" SET allow_suspicious_low_cardinality_types = 1; + SET allow_local_data_lakes = 1; DROP DATABASE IF EXISTS d1_$CLICKHOUSE_DATABASE; DROP DATABASE IF EXISTS d2_$CLICKHOUSE_DATABASE;