Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow non-string arguments in concatWithSeparator() #59341

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/en/sql-reference/functions/string-functions.md
Expand Up @@ -515,7 +515,7 @@ Alias: `concat_ws`
**Arguments**

- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
- exprN — expression to be concatenated. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
- exprN — expression to be concatenated. Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.

**Returned values**

Expand Down
18 changes: 9 additions & 9 deletions src/Functions/concat.cpp
Expand Up @@ -80,29 +80,29 @@ class ConcatImpl : public IFunction
const ColumnConst * c0_const_string = checkAndGetColumnConst<ColumnString>(c0);
const ColumnConst * c1_const_string = checkAndGetColumnConst<ColumnString>(c1);

auto c_res = ColumnString::create();
auto col_res = ColumnString::create();

if (c0_string && c1_string)
concat(StringSource(*c0_string), StringSource(*c1_string), StringSink(*c_res, c0->size()));
concat(StringSource(*c0_string), StringSource(*c1_string), StringSink(*col_res, c0->size()));
else if (c0_string && c1_const_string)
concat(StringSource(*c0_string), ConstSource<StringSource>(*c1_const_string), StringSink(*c_res, c0->size()));
concat(StringSource(*c0_string), ConstSource<StringSource>(*c1_const_string), StringSink(*col_res, c0->size()));
else if (c0_const_string && c1_string)
concat(ConstSource<StringSource>(*c0_const_string), StringSource(*c1_string), StringSink(*c_res, c0->size()));
concat(ConstSource<StringSource>(*c0_const_string), StringSource(*c1_string), StringSink(*col_res, c0->size()));
else
{
/// Fallback: use generic implementation for not very important cases.
return executeFormatImpl(arguments, input_rows_count);
}

return c_res;
return col_res;
}

ColumnPtr executeFormatImpl(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
{
const size_t num_arguments = arguments.size();
assert(num_arguments >= 2);

auto c_res = ColumnString::create();
auto col_res = ColumnString::create();
std::vector<const ColumnString::Chars *> data(num_arguments);
std::vector<const ColumnString::Offsets *> offsets(num_arguments);
std::vector<size_t> fixed_string_sizes(num_arguments);
Expand Down Expand Up @@ -169,11 +169,11 @@ class ConcatImpl : public IFunction
offsets,
fixed_string_sizes,
constant_strings,
c_res->getChars(),
c_res->getOffsets(),
col_res->getChars(),
col_res->getOffsets(),
input_rows_count);

return c_res;
return col_res;
}
};

Expand Down
58 changes: 40 additions & 18 deletions src/Functions/concatWithSeparator.cpp
@@ -1,5 +1,6 @@
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnStringHelpers.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
Expand Down Expand Up @@ -27,7 +28,6 @@ class ConcatWithSeparatorImpl : public IFunction
public:
static constexpr auto name = Name::name;
explicit ConcatWithSeparatorImpl(ContextPtr context_) : context(context_) { }

static FunctionPtr create(ContextPtr context) { return std::make_shared<ConcatWithSeparatorImpl>(context); }

String getName() const override { return name; }
Expand All @@ -49,17 +49,13 @@ class ConcatWithSeparatorImpl : public IFunction
getName(),
arguments.size());

for (const auto arg_idx : collections::range(0, arguments.size()))
{
const auto * arg = arguments[arg_idx].get();
if (!isStringOrFixedString(arg))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument {} of function {}",
arg->getName(),
arg_idx + 1,
getName());
}
const auto * separator_arg = arguments[0].get();
if (!isStringOrFixedString(separator_arg))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of first argument of function {}",
separator_arg->getName(),
getName());

return std::make_shared<DataTypeString>();
}
Expand All @@ -70,8 +66,9 @@ class ConcatWithSeparatorImpl : public IFunction
if (arguments.size() == 1)
return result_type->createColumnConstWithDefaultValue(input_rows_count);

auto c_res = ColumnString::create();
c_res->reserve(input_rows_count);
auto col_res = ColumnString::create();
col_res->reserve(input_rows_count);

const ColumnConst * col_sep = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
if (!col_sep)
throw Exception(
Expand All @@ -88,6 +85,7 @@ class ConcatWithSeparatorImpl : public IFunction
std::vector<const ColumnString::Offsets *> offsets(num_args);
std::vector<size_t> fixed_string_sizes(num_args);
std::vector<std::optional<String>> constant_strings(num_args);
std::vector<ColumnString::MutablePtr> converted_col_ptrs(num_args);

bool has_column_string = false;
bool has_column_fixed_string = false;
Expand All @@ -111,9 +109,33 @@ class ConcatWithSeparatorImpl : public IFunction
fixed_string_sizes[2 * i] = fixed_col->getN();
}
else if (const ColumnConst * const_col = checkAndGetColumnConstStringOrFixedString(column.get()))
{
constant_strings[2 * i] = const_col->getValue<String>();
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", column->getName(), getName());
{
/// A non-String/non-FixedString-type argument: use the default serialization to convert it to String
auto full_column = column->convertToFullIfNeeded();
auto serialization = arguments[i +1].type->getDefaultSerialization();
auto converted_col_str = ColumnString::create();
ColumnStringHelpers::WriteHelper write_helper(*converted_col_str, column->size());
auto & write_buffer = write_helper.getWriteBuffer();
FormatSettings format_settings;
for (size_t row = 0; row < column->size(); ++row)
{
serialization->serializeText(*full_column, row, write_buffer, format_settings);
write_helper.rowWritten();
}
write_helper.finalize();

/// Keep the pointer alive
converted_col_ptrs[i] = std::move(converted_col_str);

/// Same as the normal `ColumnString` branch
has_column_string = true;
data[2 * i] = &converted_col_ptrs[i]->getChars();
offsets[2 * i] = &converted_col_ptrs[i]->getOffsets();
}
}

String pattern;
Expand All @@ -129,10 +151,10 @@ class ConcatWithSeparatorImpl : public IFunction
offsets,
fixed_string_sizes,
constant_strings,
c_res->getChars(),
c_res->getOffsets(),
col_res->getChars(),
col_res->getOffsets(),
input_rows_count);
return std::move(c_res);
return std::move(col_res);
}

private:
Expand Down
4 changes: 2 additions & 2 deletions src/Functions/formatString.h
Expand Up @@ -18,7 +18,7 @@ struct FormatStringImpl
static constexpr size_t right_padding = 15;

template <typename... Args>
static inline void formatExecute(bool possibly_has_column_string, bool possibly_has_column_fixed_string, Args &&... args)
static void formatExecute(bool possibly_has_column_string, bool possibly_has_column_fixed_string, Args &&... args)
{
if (possibly_has_column_string && possibly_has_column_fixed_string)
format<true, true>(std::forward<Args>(args)...);
Expand All @@ -38,7 +38,7 @@ struct FormatStringImpl
/// input_rows_count is the number of rows processed.
/// Precondition: data.size() == offsets.size() == fixed_string_N.size() == constant_strings.size().
template <bool has_column_string, bool has_column_fixed_string>
static inline void format(
static void format(
String pattern,
const std::vector<const ColumnString::Chars *> & data,
const std::vector<const ColumnString::Offsets *> & offsets,
Expand Down
39 changes: 39 additions & 0 deletions tests/queries/0_stateless/02495_concat_with_separator.reference
Expand Up @@ -14,6 +14,45 @@
1
1
1
1
1
\N
\N
\N
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
99 changes: 72 additions & 27 deletions tests/queries/0_stateless/02495_concat_with_separator.sql
@@ -1,27 +1,72 @@
select concatWithSeparator('|', 'a', 'b') == 'a|b';
select concatWithSeparator('|', 'a', materialize('b')) == 'a|b';
select concatWithSeparator('|', materialize('a'), 'b') == 'a|b';
select concatWithSeparator('|', materialize('a'), materialize('b')) == 'a|b';

select concatWithSeparator('|', 'a', toFixedString('b', 1)) == 'a|b';
select concatWithSeparator('|', 'a', materialize(toFixedString('b', 1))) == 'a|b';
select concatWithSeparator('|', materialize('a'), toFixedString('b', 1)) == 'a|b';
select concatWithSeparator('|', materialize('a'), materialize(toFixedString('b', 1))) == 'a|b';

select concatWithSeparator('|', toFixedString('a', 1), 'b') == 'a|b';
select concatWithSeparator('|', toFixedString('a', 1), materialize('b')) == 'a|b';
select concatWithSeparator('|', materialize(toFixedString('a', 1)), 'b') == 'a|b';
select concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize('b')) == 'a|b';

select concatWithSeparator('|', toFixedString('a', 1), toFixedString('b', 1)) == 'a|b';
select concatWithSeparator('|', toFixedString('a', 1), materialize(toFixedString('b', 1))) == 'a|b';
select concatWithSeparator('|', materialize(toFixedString('a', 1)), toFixedString('b', 1)) == 'a|b';
select concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize(toFixedString('b', 1))) == 'a|b';

select concatWithSeparator(null, 'a', 'b') == null;
select concatWithSeparator('1', null, 'b') == null;
select concatWithSeparator('1', 'a', null) == null;

select concatWithSeparator(materialize('|'), 'a', 'b'); -- { serverError 44 }
select concatWithSeparator(); -- { serverError 42 }
select concatWithSeparator('|', 'a', 100); -- { serverError 43 }
SET allow_suspicious_low_cardinality_types=1;

-- negative tests
SELECT concatWithSeparator(materialize('|'), 'a', 'b'); -- { serverError ILLEGAL_COLUMN }
SELECT concatWithSeparator(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }

-- special cases
SELECT concatWithSeparator('|') = '';
SELECT concatWithSeparator('|', 'a') == 'a';

SELECT concatWithSeparator('|', 'a', 'b') == 'a|b';
SELECT concatWithSeparator('|', 'a', materialize('b')) == 'a|b';
SELECT concatWithSeparator('|', materialize('a'), 'b') == 'a|b';
SELECT concatWithSeparator('|', materialize('a'), materialize('b')) == 'a|b';

SELECT concatWithSeparator('|', 'a', toFixedString('b', 1)) == 'a|b';
SELECT concatWithSeparator('|', 'a', materialize(toFixedString('b', 1))) == 'a|b';
SELECT concatWithSeparator('|', materialize('a'), toFixedString('b', 1)) == 'a|b';
SELECT concatWithSeparator('|', materialize('a'), materialize(toFixedString('b', 1))) == 'a|b';

SELECT concatWithSeparator('|', toFixedString('a', 1), 'b') == 'a|b';
SELECT concatWithSeparator('|', toFixedString('a', 1), materialize('b')) == 'a|b';
SELECT concatWithSeparator('|', materialize(toFixedString('a', 1)), 'b') == 'a|b';
SELECT concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize('b')) == 'a|b';

SELECT concatWithSeparator('|', toFixedString('a', 1), toFixedString('b', 1)) == 'a|b';
SELECT concatWithSeparator('|', toFixedString('a', 1), materialize(toFixedString('b', 1))) == 'a|b';
SELECT concatWithSeparator('|', materialize(toFixedString('a', 1)), toFixedString('b', 1)) == 'a|b';
SELECT concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize(toFixedString('b', 1))) == 'a|b';

SELECT concatWithSeparator(null, 'a', 'b') == null;
SELECT concatWithSeparator('1', null, 'b') == null;
SELECT concatWithSeparator('1', 'a', null) == null;

-- Const String + non-const non-String/non-FixedString type'
SELECT concatWithSeparator('|', 'a', materialize(42 :: Int8)) == 'a|42';
SELECT concatWithSeparator('|', 'a', materialize(43 :: Int16)) == 'a|43';
SELECT concatWithSeparator('|', 'a', materialize(44 :: Int32)) == 'a|44';
SELECT concatWithSeparator('|', 'a', materialize(45 :: Int64)) == 'a|45';
SELECT concatWithSeparator('|', 'a', materialize(46 :: Int128)) == 'a|46';
SELECT concatWithSeparator('|', 'a', materialize(47 :: Int256)) == 'a|47';
SELECT concatWithSeparator('|', 'a', materialize(48 :: UInt8)) == 'a|48';
SELECT concatWithSeparator('|', 'a', materialize(49 :: UInt16)) == 'a|49';
SELECT concatWithSeparator('|', 'a', materialize(50 :: UInt32)) == 'a|50';
SELECT concatWithSeparator('|', 'a', materialize(51 :: UInt64)) == 'a|51';
SELECT concatWithSeparator('|', 'a', materialize(52 :: UInt128)) == 'a|52';
SELECT concatWithSeparator('|', 'a', materialize(53 :: UInt256)) == 'a|53';
SELECT concatWithSeparator('|', 'a', materialize(42.42 :: Float32)) == 'a|42.42';
SELECT concatWithSeparator('|', 'a', materialize(43.43 :: Float64)) == 'a|43.43';
SELECT concatWithSeparator('|', 'a', materialize(44.44 :: Decimal(2))) == 'a|44';
SELECT concatWithSeparator('|', 'a', materialize(true :: Bool)) == 'a|true';
SELECT concatWithSeparator('|', 'a', materialize(false :: Bool)) == 'a|false';
SELECT concatWithSeparator('|', 'a', materialize('foo' :: String)) == 'a|foo';
SELECT concatWithSeparator('|', 'a', materialize('bar' :: FixedString(3))) == 'a|bar';
SELECT concatWithSeparator('|', 'a', materialize('foo' :: Nullable(String))) == 'a|foo';
SELECT concatWithSeparator('|', 'a', materialize('bar' :: Nullable(FixedString(3)))) == 'a|bar';
SELECT concatWithSeparator('|', 'a', materialize('foo' :: LowCardinality(String))) == 'a|foo';
SELECT concatWithSeparator('|', 'a', materialize('bar' :: LowCardinality(FixedString(3)))) == 'a|bar';
SELECT concatWithSeparator('|', 'a', materialize('foo' :: LowCardinality(Nullable(String)))) == 'a|foo';
SELECT concatWithSeparator('|', 'a', materialize('bar' :: LowCardinality(Nullable(FixedString(3))))) == 'a|bar';
SELECT concatWithSeparator('|', 'a', materialize(42 :: LowCardinality(Nullable(UInt32)))) == 'a|42';
SELECT concatWithSeparator('|', 'a', materialize(42 :: LowCardinality(UInt32))) == 'a|42';
SELECT concatWithSeparator('|', 'a', materialize('fae310ca-d52a-4923-9e9b-02bf67f4b009' :: UUID)) == 'a|fae310ca-d52a-4923-9e9b-02bf67f4b009';
SELECT concatWithSeparator('|', 'a', materialize('2023-11-14' :: Date)) == 'a|2023-11-14';
SELECT concatWithSeparator('|', 'a', materialize('2123-11-14' :: Date32)) == 'a|2123-11-14';
SELECT concatWithSeparator('|', 'a', materialize('2023-11-14 05:50:12' :: DateTime('Europe/Amsterdam'))) == 'a|2023-11-14 05:50:12';
SELECT concatWithSeparator('|', 'a', materialize('hallo' :: Enum('hallo' = 1))) == 'a|hallo';
SELECT concatWithSeparator('|', 'a', materialize(['foo', 'bar'] :: Array(String))) == 'a|[\'foo\',\'bar\']';
SELECT concatWithSeparator('|', 'a', materialize((42, 'foo') :: Tuple(Int32, String))) == 'a|(42,\'foo\')';
SELECT concatWithSeparator('|', 'a', materialize(map(42, 'foo') :: Map(Int32, String))) == 'a|{42:\'foo\'}';
SELECT concatWithSeparator('|', 'a', materialize('122.233.64.201' :: IPv4)) == 'a|122.233.64.201';
SELECT concatWithSeparator('|', 'a', materialize('2001:0001:130F:0002:0003:09C0:876A:130B' :: IPv6)) == 'a|2001:0001:130F:0002:0003:09C0:876A:130B';