From 1f4398d6bcc05c028f66337e93e1d9b2e3029a4f Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 11 Sep 2023 14:55:37 +0000 Subject: [PATCH 1/4] Fix parsing error in WithNames formats while reading subset of columns with disabled input_format_with_names_use_header --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 ++-- src/Formats/FormatFactory.cpp | 17 +++++++++++++---- src/Formats/FormatFactory.h | 12 +++++++++--- src/Formats/registerWithNamesAndTypes.cpp | 5 +++-- src/Interpreters/Context.cpp | 4 ++-- src/Storages/HDFS/StorageHDFS.cpp | 6 +++--- src/Storages/HDFS/StorageHDFS.h | 2 +- src/Storages/Hive/StorageHive.h | 2 +- src/Storages/IStorage.h | 2 -- src/Storages/StorageAzureBlob.cpp | 6 +++--- src/Storages/StorageAzureBlob.h | 2 +- src/Storages/StorageFile.cpp | 6 +++--- src/Storages/StorageFile.h | 2 +- src/Storages/StorageS3.cpp | 6 +++--- src/Storages/StorageS3.h | 4 +--- src/Storages/StorageURL.cpp | 8 ++++---- src/Storages/StorageURL.h | 2 +- src/Storages/StorageXDBC.cpp | 2 +- src/Storages/StorageXDBC.h | 2 +- src/TableFunctions/ITableFunction.h | 2 +- src/TableFunctions/ITableFunctionFileLike.cpp | 4 ++-- src/TableFunctions/ITableFunctionFileLike.h | 2 +- .../TableFunctionAzureBlobStorage.cpp | 4 ++-- .../TableFunctionAzureBlobStorage.h | 2 +- src/TableFunctions/TableFunctionS3.cpp | 4 ++-- src/TableFunctions/TableFunctionS3.h | 2 +- ...formats_with_names_dont_use_header.reference | 1 + .../02876_formats_with_names_dont_use_header.sh | 10 ++++++++++ ...with_names_dont_use_header_test.csvwithnames | 2 ++ 29 files changed, 76 insertions(+), 51 deletions(-) create mode 100644 tests/queries/0_stateless/02876_formats_with_names_dont_use_header.reference create mode 100755 tests/queries/0_stateless/02876_formats_with_names_dont_use_header.sh create mode 100644 tests/queries/0_stateless/02876_formats_with_names_dont_use_header_test.csvwithnames diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index abf02547ccd1..1aa50340d6f7 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6321,9 +6321,9 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, { /// For input function we should check if input format supports reading subset of columns. if (table_function_ptr->getName() == "input") - use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat()); + use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat(), scope.context); else - use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns(); + use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns(scope.context); } if (use_columns_from_insert_query) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 663b7f1ba950..59b6513cb9b1 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -678,10 +678,18 @@ void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & na void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name) { - auto & target = dict[name].supports_subset_of_columns; + auto & target = dict[name].subset_of_columns_support_checker; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subset of columns", name); - target = true; + target = [](const FormatSettings &){ return true; }; +} + +void FormatFactory::registerSubsetOfColumnsSupportChecker(const String & name, SubsetOfColumnsSupportChecker subset_of_columns_support_checker) +{ + auto & target = dict[name].subset_of_columns_support_checker; + if (target) + throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subset of columns", name); + target = std::move(subset_of_columns_support_checker); } void FormatFactory::markFormatSupportsSubcolumns(const String & name) @@ -706,10 +714,11 @@ bool FormatFactory::checkIfFormatSupportsSubcolumns(const String & name) const return target.supports_subcolumns; } -bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const String & name) const +bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const DB::String & name, const ContextPtr & context, const std::optional & format_settings_) const { const auto & target = getCreators(name); - return target.supports_subset_of_columns; + auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context); + return target.subset_of_columns_support_checker && target.subset_of_columns_support_checker(format_settings); } void FormatFactory::registerAdditionalInfoForSchemaCacheGetter( diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 489db944ee6c..6f53d42cccac 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -126,6 +126,10 @@ class FormatFactory final : private boost::noncopyable /// and the name of the message. using AdditionalInfoForSchemaCacheGetter = std::function; + /// Some formats can support reading subset of columns depending on settings. + /// The checker should return true if format support append. + using SubsetOfColumnsSupportChecker = std::function; + struct Creators { InputCreator input_creator; @@ -136,11 +140,11 @@ class FormatFactory final : private boost::noncopyable ExternalSchemaReaderCreator external_schema_reader_creator; bool supports_parallel_formatting{false}; bool supports_subcolumns{false}; - bool supports_subset_of_columns{false}; bool prefers_large_blocks{false}; NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker; AppendSupportChecker append_support_checker; AdditionalInfoForSchemaCacheGetter additional_info_for_schema_cache_getter; + SubsetOfColumnsSupportChecker subset_of_columns_support_checker; }; using FormatsDictionary = std::unordered_map; @@ -229,10 +233,12 @@ class FormatFactory final : private boost::noncopyable void markOutputFormatSupportsParallelFormatting(const String & name); void markOutputFormatPrefersLargeBlocks(const String & name); void markFormatSupportsSubcolumns(const String & name); - void markFormatSupportsSubsetOfColumns(const String & name); bool checkIfFormatSupportsSubcolumns(const String & name) const; - bool checkIfFormatSupportsSubsetOfColumns(const String & name) const; + + void markFormatSupportsSubsetOfColumns(const String & name); + void registerSubsetOfColumnsSupportChecker(const String & name, SubsetOfColumnsSupportChecker subset_of_columns_support_checker); + bool checkIfFormatSupportsSubsetOfColumns(const String & name, const ContextPtr & context, const std::optional & format_settings_ = std::nullopt) const; bool checkIfFormatHasSchemaReader(const String & name) const; bool checkIfFormatHasExternalSchemaReader(const String & name) const; diff --git a/src/Formats/registerWithNamesAndTypes.cpp b/src/Formats/registerWithNamesAndTypes.cpp index 2dee107844dc..674865a3bed3 100644 --- a/src/Formats/registerWithNamesAndTypes.cpp +++ b/src/Formats/registerWithNamesAndTypes.cpp @@ -12,8 +12,9 @@ void registerWithNamesAndTypes(const std::string & base_format_name, RegisterWit void markFormatWithNamesAndTypesSupportsSamplingColumns(const std::string & base_format_name, FormatFactory & factory) { - factory.markFormatSupportsSubsetOfColumns(base_format_name + "WithNames"); - factory.markFormatSupportsSubsetOfColumns(base_format_name + "WithNamesAndTypes"); + auto setting_checker = [](const FormatSettings & settings){ return settings.with_names_use_header; }; + factory.registerSubsetOfColumnsSupportChecker(base_format_name + "WithNames", setting_checker); + factory.registerSubsetOfColumnsSupportChecker(base_format_name + "WithNamesAndTypes", setting_checker); } } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index f83e524ffb9d..5be014a18f85 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1703,9 +1703,9 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const { /// For input function we should check if input format supports reading subset of columns. if (table_function_ptr->getName() == "input") - use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat()); + use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat(), shared_from_this()); else - use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns(); + use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns(shared_from_this()); } if (use_columns_from_insert_query) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index fa6cfd824e79..3b87fa29af3e 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -719,9 +719,9 @@ class PartitionedHDFSSink : public PartitionedSink }; -bool StorageHDFS::supportsSubsetOfColumns() const +bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const { - return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_); } Pipe StorageHDFS::read( @@ -770,7 +770,7 @@ Pipe StorageHDFS::read( ColumnsDescription columns_description; Block block_for_format; - if (supportsSubsetOfColumns()) + if (supportsSubsetOfColumns(context_)) { auto fetch_columns = column_names; const auto & virtuals = getVirtuals(); diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index b248a37a83d9..1af55a959d7f 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -74,7 +74,7 @@ class StorageHDFS final : public IStorage, WithContext /// Is is useful because column oriented formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool supportsSubsetOfColumns() const override; + bool supportsSubsetOfColumns(const ContextPtr & context_) const; static ColumnsDescription getTableStructureFromData( const String & format, diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 604df70f4d0e..a3c47d400e21 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -65,7 +65,7 @@ class StorageHive final : public IStorage, WithContext NamesAndTypesList getVirtuals() const override; - bool supportsSubsetOfColumns() const override; + bool supportsSubsetOfColumns() const; std::optional totalRows(const Settings & settings) const override; std::optional totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context_) const override; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index ec92f57aeda5..1b738dd8d8cd 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -619,8 +619,6 @@ class IStorage : public std::enable_shared_from_this, public TypePromo /// NOTE: write-once also does not support INSERTs/merges/... for MergeTree virtual bool isStaticStorage() const; - virtual bool supportsSubsetOfColumns() const { return false; } - /// If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. /// Used for: /// - Simple count() optimization diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 365de2611ce0..d06838b639c8 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -641,7 +641,7 @@ Pipe StorageAzureBlob::read( ColumnsDescription columns_description; Block block_for_format; - if (supportsSubsetOfColumns()) + if (supportsSubsetOfColumns(local_context)) { auto fetch_columns = column_names; const auto & virtuals = getVirtuals(); @@ -767,9 +767,9 @@ bool StorageAzureBlob::supportsSubcolumns() const return FormatFactory::instance().checkIfFormatSupportsSubcolumns(configuration.format); } -bool StorageAzureBlob::supportsSubsetOfColumns() const +bool StorageAzureBlob::supportsSubsetOfColumns(const ContextPtr & context) const { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); } bool StorageAzureBlob::prefersLargeBlocks() const diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index ad87da1f61aa..923e5512cfb0 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -95,7 +95,7 @@ class StorageAzureBlob : public IStorage bool supportsSubcolumns() const override; - bool supportsSubsetOfColumns() const override; + bool supportsSubsetOfColumns(const ContextPtr & context) const; bool prefersLargeBlocks() const override; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7e5e9d2b38c2..a2cbc675f39d 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -526,9 +526,9 @@ ColumnsDescription StorageFile::getTableStructureFromFile( return columns; } -bool StorageFile::supportsSubsetOfColumns() const +bool StorageFile::supportsSubsetOfColumns(const ContextPtr & context) const { - return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); + return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context, format_settings); } bool StorageFile::prefersLargeBlocks() const @@ -949,7 +949,7 @@ Pipe StorageFile::read( { ColumnsDescription columns_description; Block block_for_format; - if (supportsSubsetOfColumns()) + if (supportsSubsetOfColumns(context)) { auto fetch_columns = column_names; const auto & virtuals = getVirtuals(); diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index ed50ae73e517..3d2fa444744c 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -73,7 +73,7 @@ class StorageFile final : public IStorage /// Is is useful because such formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool supportsSubsetOfColumns() const override; + bool supportsSubsetOfColumns(const ContextPtr & context) const; bool prefersLargeBlocks() const override; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index b52150250b8b..aaabf736de27 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1002,9 +1002,9 @@ bool StorageS3::supportsSubcolumns() const return FormatFactory::instance().checkIfFormatSupportsSubcolumns(configuration.format); } -bool StorageS3::supportsSubsetOfColumns() const +bool StorageS3::supportsSubsetOfColumns(const ContextPtr & context) const { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings); } bool StorageS3::prefersLargeBlocks() const @@ -1047,7 +1047,7 @@ Pipe StorageS3::read( ColumnsDescription columns_description; Block block_for_format; - if (supportsSubsetOfColumns()) + if (supportsSubsetOfColumns(local_context)) { auto fetch_columns = column_names; const auto & virtuals = getVirtuals(); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index d001a86842e4..a245c957d599 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -352,7 +352,7 @@ class StorageS3 : public IStorage bool supportsSubcolumns() const override; - bool supportsSubsetOfColumns() const override; + bool supportsSubsetOfColumns(const ContextPtr & context) const; bool prefersLargeBlocks() const override; @@ -372,8 +372,6 @@ class StorageS3 : public IStorage const String & format_name, const std::optional & format_settings, const ContextPtr & ctx); -}; - } #endif diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 41eb18ab5416..dabe13834d88 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -669,9 +669,9 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( return columns; } -bool IStorageURLBase::supportsSubsetOfColumns() const +bool IStorageURLBase::supportsSubsetOfColumns(const ContextPtr & context) const { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context, format_settings); } bool IStorageURLBase::prefersLargeBlocks() const @@ -697,7 +697,7 @@ Pipe IStorageURLBase::read( ColumnsDescription columns_description; Block block_for_format; - if (supportsSubsetOfColumns()) + if (supportsSubsetOfColumns(local_context)) { columns_description = storage_snapshot->getDescriptionForColumns(column_names); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); @@ -800,7 +800,7 @@ Pipe StorageURLWithFailover::read( { ColumnsDescription columns_description; Block block_for_format; - if (supportsSubsetOfColumns()) + if (supportsSubsetOfColumns(local_context)) { columns_description = storage_snapshot->getDescriptionForColumns(column_names); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 68fd4014ac13..598125bc1140 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -105,7 +105,7 @@ class IStorageURLBase : public IStorage QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const; - bool supportsSubsetOfColumns() const override; + virtual bool supportsSubsetOfColumns(const ContextPtr & context) const; bool prefersLargeBlocks() const override; diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index b532d1c91f0f..a357876b84cf 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -145,7 +145,7 @@ SinkToStoragePtr StorageXDBC::write(const ASTPtr & /* query */, const StorageMet compression_method); } -bool StorageXDBC::supportsSubsetOfColumns() const +bool StorageXDBC::supportsSubsetOfColumns(const ContextPtr &) const { return true; } diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index d7a1138c7100..1c1651cb3338 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -68,7 +68,7 @@ class StorageXDBC : public IStorageURLBase Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const override; - bool supportsSubsetOfColumns() const override; + bool supportsSubsetOfColumns(const ContextPtr &) const override; }; } diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index fe71005cb9c8..8f50314eaff9 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -76,7 +76,7 @@ class ITableFunction : public std::enable_shared_from_this /// because we cannot determine which column from table correspond to this virtual column. virtual std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const { return {}; } - virtual bool supportsReadingSubsetOfColumns() { return true; } + virtual bool supportsReadingSubsetOfColumns(const ContextPtr &) { return true; } /// Create storage according to the query. StoragePtr diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index a60ab70d5704..af0f49a1cc8f 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -32,9 +32,9 @@ String ITableFunctionFileLike::getFormatFromFirstArgument() return FormatFactory::instance().getFormatFromFileName(filename, true); } -bool ITableFunctionFileLike::supportsReadingSubsetOfColumns() +bool ITableFunctionFileLike::supportsReadingSubsetOfColumns(const ContextPtr & context) { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format, context); } void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, ContextPtr context) diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index 8300cc275916..41f08de84821 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -27,7 +27,7 @@ class ITableFunctionFileLike : public ITableFunction void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - bool supportsReadingSubsetOfColumns() override; + bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; static size_t getMaxNumberOfArguments() { return 4; } diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index d2a961734919..3707e7d5196d 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -208,9 +208,9 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex return parseColumnsListFromString(configuration.structure, context); } -bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns() +bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns(const ContextPtr & context) { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context); } StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.h b/src/TableFunctions/TableFunctionAzureBlobStorage.h index 0ac3f9771c75..e00fb6bb19b0 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.h +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.h @@ -39,7 +39,7 @@ class TableFunctionAzureBlobStorage : public ITableFunction void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - bool supportsReadingSubsetOfColumns() override; + bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override { diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 0f3078b1ca62..504318b74a83 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -325,9 +325,9 @@ ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) return parseColumnsListFromString(configuration.structure, context); } -bool TableFunctionS3::supportsReadingSubsetOfColumns() +bool TableFunctionS3::supportsReadingSubsetOfColumns(const ContextPtr & context) { - return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context); } StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index d308f4692363..82de62e386d9 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -47,7 +47,7 @@ class TableFunctionS3 : public ITableFunction void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } - bool supportsReadingSubsetOfColumns() override; + bool supportsReadingSubsetOfColumns(const ContextPtr & context) override; std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override { diff --git a/tests/queries/0_stateless/02876_formats_with_names_dont_use_header.reference b/tests/queries/0_stateless/02876_formats_with_names_dont_use_header.reference new file mode 100644 index 000000000000..0cfbf08886fc --- /dev/null +++ b/tests/queries/0_stateless/02876_formats_with_names_dont_use_header.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02876_formats_with_names_dont_use_header.sh b/tests/queries/0_stateless/02876_formats_with_names_dont_use_header.sh new file mode 100755 index 000000000000..ce06ff530b9b --- /dev/null +++ b/tests/queries/0_stateless/02876_formats_with_names_dont_use_header.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo -e "a,b,c\n1,2,3" > $CLICKHOUSE_TEST_UNIQUE_NAME.csvwithnames + +$CLICKHOUSE_LOCAL -q "select b from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csvwithnames') settings input_format_with_names_use_header=0" + diff --git a/tests/queries/0_stateless/02876_formats_with_names_dont_use_header_test.csvwithnames b/tests/queries/0_stateless/02876_formats_with_names_dont_use_header_test.csvwithnames new file mode 100644 index 000000000000..bfde6bfa0b87 --- /dev/null +++ b/tests/queries/0_stateless/02876_formats_with_names_dont_use_header_test.csvwithnames @@ -0,0 +1,2 @@ +a,b,c +1,2,3 From 5714f1fd3bc6313a5add8130090ced274951f619 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 11 Sep 2023 14:58:02 +0000 Subject: [PATCH 2/4] Remove unused field --- src/Formats/FormatFactory.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 6f53d42cccac..5dffd2999b32 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -139,7 +139,6 @@ class FormatFactory final : private boost::noncopyable SchemaReaderCreator schema_reader_creator; ExternalSchemaReaderCreator external_schema_reader_creator; bool supports_parallel_formatting{false}; - bool supports_subcolumns{false}; bool prefers_large_blocks{false}; NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker; AppendSupportChecker append_support_checker; From b142e42eee4ec0c70672503944c6a27abc34e781 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 14 Sep 2023 14:03:30 +0200 Subject: [PATCH 3/4] Fix bad conflict resolve --- src/Formats/FormatFactory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 5dffd2999b32..a4e29c6bb84f 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -138,6 +138,7 @@ class FormatFactory final : private boost::noncopyable FileSegmentationEngine file_segmentation_engine; SchemaReaderCreator schema_reader_creator; ExternalSchemaReaderCreator external_schema_reader_creator; + bool supports_subcolumns{false}; bool supports_parallel_formatting{false}; bool prefers_large_blocks{false}; NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker; From 812a3e63b50b7d5d00272801114f7cd3bac26c64 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 14 Sep 2023 14:05:07 +0200 Subject: [PATCH 4/4] Fix bad conflict resolve --- src/Storages/StorageS3.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index a245c957d599..039c3364c22d 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -372,6 +372,8 @@ class StorageS3 : public IStorage const String & format_name, const std::optional & format_settings, const ContextPtr & ctx); +}; + } #endif