Skip to content

Commit

Permalink
Merge pull request #49960 from Avogar/fix-tsv-nullable-parsing
Browse files Browse the repository at this point in the history
Fix possible Logical error on bad Nullable parsing for text formats
  • Loading branch information
Avogar committed May 23, 2023
2 parents 141a72d + bf19765 commit 0e346c7
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 1 deletion.
8 changes: 8 additions & 0 deletions src/DataTypes/Serializations/SerializationNullable.cpp
Expand Up @@ -355,6 +355,9 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
/// It can happen only if there is a string instead of a number
/// or if someone uses tab or LF in TSV null_representation.
/// In the first case we cannot continue reading anyway. The second case seems to be unlikely.
/// We also should delete incorrectly deserialized value from nested column.
nested_column.popBack(1);

if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos)
throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
"containing '\\t' or '\\n' may not work correctly for large input.");
Expand Down Expand Up @@ -447,6 +450,8 @@ ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, Re

/// We have some unread data in PeekableReadBuffer own memory.
/// It can happen only if there is an unquoted string instead of a number.
/// We also should delete incorrectly deserialized value from nested column.
nested_column.popBack(1);
throw DB::ParsingException(
ErrorCodes::CANNOT_READ_ALL_DATA,
"Error while parsing Nullable: got an unquoted string {} instead of a number",
Expand Down Expand Up @@ -579,6 +584,9 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB
/// It can happen only if there is an unquoted string instead of a number
/// or if someone uses csv delimiter, LF or CR in CSV null representation.
/// In the first case we cannot continue reading anyway. The second case seems to be unlikely.
/// We also should delete incorrectly deserialized value from nested column.
nested_column.popBack(1);

if (null_representation.find(settings.csv.delimiter) != std::string::npos || null_representation.find('\r') != std::string::npos
|| null_representation.find('\n') != std::string::npos)
throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "CSV custom null representation containing "
Expand Down
Expand Up @@ -44,7 +44,7 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat(
format_settings_,
std::make_unique<CustomSeparatedFormatReader>(*buf_, ignore_spaces_, format_settings_),
format_settings_.custom.try_detect_header)
, buf(std::move(buf_))
, buf(std::move(buf_)), ignore_spaces(ignore_spaces_)
{
/// In case of CustomSeparatedWithNames(AndTypes) formats and enabled setting input_format_with_names_use_header we don't know
/// the exact number of columns in data (because it can contain unknown columns). So, if field_delimiter and row_after_delimiter are
Expand Down
@@ -0,0 +1,3 @@
1 \N
1 \N
1 \N
@@ -0,0 +1,15 @@
#!/usr/bin/env bash

# NOTE: this sh wrapper is required because of shell_config

CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh

$CLICKHOUSE_CLIENT -q "drop table if exists test"
$CLICKHOUSE_CLIENT -q "create table test (x UInt32, y Nullable(UInt32)) engine=MergeTree order by x"
$CLICKHOUSE_CLIENT -q "select '1\t\\\N\n2\t\\\' format RawBLOB" | $CLICKHOUSE_CLIENT -q "insert into test settings input_format_allow_errors_num=1 format TSV"
$CLICKHOUSE_CLIENT -q "select '1,\\\N\n2,\\\' format RawBLOB" | $CLICKHOUSE_CLIENT -q "insert into test settings input_format_allow_errors_num=1 format CSV"
$CLICKHOUSE_CLIENT -q "select '1\tNULL\n2\tN' format RawBLOB" | $CLICKHOUSE_CLIENT -q "insert into test settings input_format_allow_errors_num=2, format_custom_escaping_rule='Quoted' format CustomSeparated"
$CLICKHOUSE_CLIENT -q "select * from test"
$CLICKHOUSE_CLIENT -q "drop table test";

0 comments on commit 0e346c7

Please sign in to comment.