Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Maxsplit argument for splitByChar. #34140

Merged
merged 5 commits into from
Feb 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
71 changes: 69 additions & 2 deletions src/Functions/FunctionsStringArray.h
Expand Up @@ -26,6 +26,7 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}


Expand Down Expand Up @@ -69,6 +70,8 @@ class AlphaTokensImpl
static constexpr auto name = "alphaTokens";
static String getName() { return name; }

static bool isVariadic() { return false; }

static size_t getNumberOfArguments() { return 1; }

/// Check the type of the function's arguments.
Expand Down Expand Up @@ -127,6 +130,7 @@ class SplitByNonAlphaImpl
static constexpr auto name = "splitByNonAlpha";
static String getName() { return name; }

static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }

/// Check the type of the function's arguments.
Expand Down Expand Up @@ -185,6 +189,7 @@ class SplitByWhitespaceImpl
static constexpr auto name = "splitByWhitespace";
static String getName() { return name; }

static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }

/// Check the type of the function's arguments.
Expand Down Expand Up @@ -239,21 +244,37 @@ class SplitByCharImpl
Pos end;

char sep;
std::optional<UInt64> max_split;
UInt64 curr_split = 0;

public:
static constexpr auto name = "splitByChar";
static String getName() { return name; }
static size_t getNumberOfArguments() { return 2; }
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }

static void checkArguments(const DataTypes & arguments)
{
if (arguments.size() < 2 || arguments.size() > 3)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function '{}' needs at least 2 arguments, at most 3 arguments; passed {}.",
arguments.size());

if (!isString(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

if (!isString(arguments[1]))
throw Exception("Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + ". Must be String.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

if (arguments.size() == 3 && !isNativeInteger(arguments[2]))
kitaisreal marked this conversation as resolved.
Show resolved Hide resolved
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Third argument for function '{}' must be integer, got '{}' instead",
getName(),
arguments[2]->getName());
}

void init(const ColumnsWithTypeAndName & arguments)
Expand All @@ -271,6 +292,39 @@ class SplitByCharImpl
throw Exception("Illegal separator for function " + getName() + ". Must be exactly one byte.", ErrorCodes::BAD_ARGUMENTS);

sep = sep_str[0];

if (arguments.size() > 2)
{
if (!((max_split = getMaxSplit<UInt8>(arguments[2]))
|| (max_split = getMaxSplit<Int8>(arguments[2]))
|| (max_split = getMaxSplit<UInt16>(arguments[2]))
|| (max_split = getMaxSplit<Int16>(arguments[2]))
|| (max_split = getMaxSplit<UInt32>(arguments[2]))
|| (max_split = getMaxSplit<Int32>(arguments[2]))
|| (max_split = getMaxSplit<UInt64>(arguments[2]))
|| (max_split = getMaxSplit<Int64>(arguments[2]))))
{
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of third argument of function {}",
arguments[2].column->getName(),
getName());
}
}
}

template <typename DataType>
std::optional<UInt64> getMaxSplit(const ColumnWithTypeAndName & argument)
{
const auto * col = checkAndGetColumnConst<ColumnVector<DataType>>(argument.column.get());
if (!col)
return std::nullopt;

auto value = col->template getValue<DataType>();
if (value < 0)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of third argument of function {}", argument.column->getName(), getName());
return value;
}

/// Returns the position of the argument, that is the column of strings
Expand All @@ -291,12 +345,19 @@ class SplitByCharImpl
return false;

token_begin = pos;
pos = reinterpret_cast<Pos>(memchr(pos, sep, end - pos));
if (unlikely(max_split && curr_split >= *max_split))
{
token_end = end;
pos = nullptr;
return true;
}

pos = reinterpret_cast<Pos>(memchr(pos, sep, end - pos));
if (pos)
{
token_end = pos;
++pos;
++curr_split;
}
else
token_end = end;
Expand All @@ -317,6 +378,7 @@ class SplitByStringImpl
public:
static constexpr auto name = "splitByString";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 2; }

static void checkArguments(const DataTypes & arguments)
Expand Down Expand Up @@ -394,6 +456,8 @@ class SplitByRegexpImpl
public:
static constexpr auto name = "splitByRegexp";
static String getName() { return name; }

static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 2; }

/// Check the type of function arguments.
Expand Down Expand Up @@ -477,6 +541,7 @@ class ExtractAllImpl
public:
static constexpr auto name = "extractAll";
static String getName() { return name; }
static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 2; }

/// Check the type of function arguments.
Expand Down Expand Up @@ -556,6 +621,8 @@ class FunctionTokens : public IFunction

bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }

bool isVariadic() const override { return Generator::isVariadic(); }

size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
Expand Down
1 change: 1 addition & 0 deletions src/Functions/URL/URLHierarchy.cpp
Expand Up @@ -20,6 +20,7 @@ class URLPathHierarchyImpl
static constexpr auto name = "URLPathHierarchy";
static String getName() { return name; }

static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }

static void checkArguments(const DataTypes & arguments)
Expand Down
1 change: 1 addition & 0 deletions src/Functions/URL/URLPathHierarchy.cpp
Expand Up @@ -19,6 +19,7 @@ class URLHierarchyImpl
static constexpr auto name = "URLHierarchy";
static String getName() { return name; }

static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }

static void checkArguments(const DataTypes & arguments)
Expand Down
1 change: 1 addition & 0 deletions src/Functions/URL/extractURLParameterNames.cpp
Expand Up @@ -19,6 +19,7 @@ class ExtractURLParameterNamesImpl
static constexpr auto name = "extractURLParameterNames";
static String getName() { return name; }

static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }

static void checkArguments(const DataTypes & arguments)
Expand Down
1 change: 1 addition & 0 deletions src/Functions/URL/extractURLParameters.cpp
Expand Up @@ -19,6 +19,7 @@ class ExtractURLParametersImpl
static constexpr auto name = "extractURLParameters";
static String getName() { return name; }

static bool isVariadic() { return false; }
static size_t getNumberOfArguments() { return 1; }

static void checkArguments(const DataTypes & arguments)
Expand Down
5 changes: 5 additions & 0 deletions tests/queries/0_stateless/02185_split_by_char.reference
@@ -0,0 +1,5 @@
['1','2','3']
['1,2,3']
['1','2,3']
['1','2','3']
['1','2','3']
8 changes: 8 additions & 0 deletions tests/queries/0_stateless/02185_split_by_char.sql
@@ -0,0 +1,8 @@
select splitByChar(',', '1,2,3');
select splitByChar(',', '1,2,3', 0);
select splitByChar(',', '1,2,3', 1);
select splitByChar(',', '1,2,3', 2);
select splitByChar(',', '1,2,3', 3);

select splitByChar(',', '1,2,3', -2); -- { serverError 44 }
select splitByChar(',', '1,2,3', ''); -- { serverError 43 }