Skip to content

Commit

Permalink
Added function firstLine to extract the first line from a multiline s…
Browse files Browse the repository at this point in the history
…tring

Fixes #51172
  • Loading branch information
mkmkme committed Jul 18, 2023
1 parent 562ea03 commit 4f7fd69
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 1 deletion.
33 changes: 33 additions & 0 deletions docs/en/sql-reference/functions/string-functions.md
Expand Up @@ -1267,3 +1267,36 @@ Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).

If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.

## firstLine

Returns the first line from a multi-line string.

**Syntax**

```sql
firstLine(val)
```

**Arguments**

- `val` - Input value. [String](../data-types/string.md)

**Returned value**

- The first line of the input value or the whole value if there is no line
separators. [String](../data-types/string.md)

**Example**

```sql
select firstLine('foo\nbar\nbaz');
```

Result:

```result
┌─firstLine('foo\nbar\nbaz')─┐
│ foo │
└────────────────────────────┘
```
36 changes: 36 additions & 0 deletions docs/ru/sql-reference/functions/string-functions.md
Expand Up @@ -1124,3 +1124,39 @@ Do Nothing for 2 Minutes 2:00  
Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.

## firstLine

Возвращает первую строку в многострочном тексте.

**Синтаксис**

```sql
firstLine(val)
```

**Аргументы**

- `val` - текст для обработки. [String](../data-types/string.md)

**Returned value**

- Первая строка текста или весь текст, если переносы строк отсутствуют.

Тип: [String](../data-types/string.md)

**Пример**

Запрос:

```sql
select firstLine('foo\nbar\nbaz');
```

Результат:

```result
┌─firstLine('foo\nbar\nbaz')─┐
│ foo │
└────────────────────────────┘
```
2 changes: 1 addition & 1 deletion src/Functions/StringHelpers.h
Expand Up @@ -156,7 +156,7 @@ struct CutSubstringImpl

static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported this function");
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by this function");
}
};

Expand Down
42 changes: 42 additions & 0 deletions src/Functions/firstLine.cpp
@@ -0,0 +1,42 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringToString.h>
#include <Functions/StringHelpers.h>
#include <base/find_symbols.h>

namespace DB
{

struct FirstLine
{
static size_t getReserveLengthForElement() { return 16; }

static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
{
res_data = data;

const Pos end = data + size;
const Pos pos = find_first_symbols<'\r', '\n'>(data, end);
res_size = pos - data;
}
};

struct NameFirstLine
{
static constexpr auto name = "firstLine";
};

using FunctionFirstLine = FunctionStringToString<ExtractSubstringImpl<FirstLine>, NameFirstLine>;

REGISTER_FUNCTION(FirstLine)
{
factory.registerFunction<FunctionFirstLine>(FunctionDocumentation{
.description = "Returns first line of a multi-line string.",
.syntax = "firstLine(string)",
.arguments = {{.name = "string", .description = "The string to process."}},
.returned_value = {"The first line of the string or the whole string if there is no line separators."},
.examples = {
{.name = "Return first line", .query = "firstLine('Hello\\nWorld')", .result = "'Hello'"},
{.name = "Return whole string", .query = "firstLine('Hello World')", .result = "'Hello World'"},
}});
}
}
9 changes: 9 additions & 0 deletions tests/queries/0_stateless/02815_first_line.reference
@@ -0,0 +1,9 @@
foo
foo
foo
foobarbaz
== vector
1 foo
2 quux
3 single line
4 windows
12 changes: 12 additions & 0 deletions tests/queries/0_stateless/02815_first_line.sql
@@ -0,0 +1,12 @@
select firstLine('foo\nbar\nbaz');
select firstLine('foo\rbar\rbaz');
select firstLine('foo\r\nbar\r\nbaz');
select firstLine('foobarbaz');

select '== vector';

drop table if exists 02815_first_line_vector;
create table 02815_first_line_vector (n Int32, text String) engine = MergeTree order by n;

insert into 02815_first_line_vector values (1, 'foo\nbar\nbaz'), (2, 'quux\n'), (3, 'single line'), (4, 'windows\r\nline breaks');
select n, firstLine(text) from 02815_first_line_vector order by n;
1 change: 1 addition & 0 deletions utils/check-style/aspell-ignore/en/aspell-dict.txt
Expand Up @@ -1428,6 +1428,7 @@ filesystemFree
filesystems
finalizeAggregation
fips
firstLine
firstSignificantSubdomain
firstSignificantSubdomainCustom
fixedstring
Expand Down

0 comments on commit 4f7fd69

Please sign in to comment.