Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sqidDecode() function #58544

Merged
merged 6 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion contrib/sqids-cpp
51 changes: 42 additions & 9 deletions docs/en/sql-reference/functions/hash-functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -1777,34 +1777,67 @@ Result:
└────────────────────────────────────────────────────────────────────────┘
```

## sqid
## sqidEncode

Transforms numbers into a [Sqid](https://sqids.org/) which is a YouTube-like ID string.
Encodes numbers as a [Sqid](https://sqids.org/) which is a YouTube-like ID string.
The output alphabet is `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789`.
Do not use this function for hashing - the generated IDs can be decoded back into numbers.
Do not use this function for hashing - the generated IDs can be decoded back into the original numbers.

**Syntax**

```sql
sqid(number1, ...)
sqidEncode(number1, ...)
```

Alias: `sqid`

**Arguments**

- A variable number of UInt8, UInt16, UInt32 or UInt64 numbers.

**Returned Value**

A hash id [String](/docs/en/sql-reference/data-types/string.md).
A sqid [String](/docs/en/sql-reference/data-types/string.md).

**Example**

```sql
SELECT sqidEncode(1, 2, 3, 4, 5);
```

```response
┌─sqidEncode(1, 2, 3, 4, 5)─┐
│ gXHfJ1C6dN │
└───────────────────────────┘
```

## sqidDecode

Decodes a [Sqid](https://sqids.org/) back into its original numbers.
Returns an empty array in case the input string is not a valid sqid.

**Syntax**

```sql
sqidDecode(sqid)
```

**Arguments**

- A sqid - [String](/docs/en/sql-reference/data-types/string.md)

**Returned Value**

The sqid transformed to numbers [Array(UInt64)](/docs/en/sql-reference/data-types/array.md).

**Example**

```sql
SELECT sqid(1, 2, 3, 4, 5);
SELECT sqidDecode('gXHfJ1C6dN');
```

```response
┌─sqid(1, 2, 3, 4, 5)─┐
gXHfJ1C6dN
└─────────────────────┘
┌─sqidDecode('gXHfJ1C6dN')─┐
[1,2,3,4,5]
└──────────────────────────
```
89 changes: 78 additions & 11 deletions src/Functions/sqid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

#if USE_SQIDS

#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>

#include <sqids/sqids.hpp>
Expand All @@ -22,17 +24,17 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}

// sqid(number1, ...)
class FunctionSqid : public IFunction
/// sqidEncode(number1, ...)
class FunctionSqidEncode : public IFunction
{
public:
static constexpr auto name = "sqid";
static constexpr auto name = "sqidEncode";

String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; }
bool isVariadic() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSqid>(); }
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSqidEncode>(); }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
Expand Down Expand Up @@ -80,21 +82,86 @@ class FunctionSqid : public IFunction
sqidscxx::Sqids<> sqids;
};

/// sqidDecode(number1, ...)
class FunctionSqidDecode : public IFunction
{
public:
static constexpr auto name = "sqidDecode";

String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSqidDecode>(); }

DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors args{
{"sqid", &isString<IDataType>, nullptr, "String"}
};
validateFunctionArgumentTypes(*this, arguments, args);

return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
}

ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
auto col_res_nested = ColumnUInt64::create();
auto & res_nested_data = col_res_nested->getData();

auto col_res_offsets = ColumnArray::ColumnOffsets::create();
auto & res_offsets_data = col_res_offsets->getData();
res_offsets_data.reserve(input_rows_count);

for (size_t i = 0; i < input_rows_count; ++i)
{
const ColumnWithTypeAndName & arg = arguments[0];
ColumnPtr current_column = arg.column;
rschu1ze marked this conversation as resolved.
Show resolved Hide resolved
std::string_view sqid = current_column->getDataAt(i).toView();
rschu1ze marked this conversation as resolved.
Show resolved Hide resolved
std::vector<UInt64> integers = sqids.decode(sqid);
res_nested_data.insert(integers.begin(), integers.end());
res_offsets_data.push_back(integers.size());
}

return ColumnArray::create(std::move(col_res_nested), std::move(col_res_offsets));
}

private:
sqidscxx::Sqids<> sqids;
};

REGISTER_FUNCTION(Sqid)
{
factory.registerFunction<FunctionSqid>(FunctionDocumentation{
factory.registerFunction<FunctionSqidEncode>(FunctionDocumentation{
.description=R"(
Transforms numbers into a [Sqid](https://sqids.org/) which is a Youtube-like ID string.)",
.syntax="sqid(number1, ...)",
.syntax="sqidEncode(number1, ...)",
.arguments={{"number1, ...", "Arbitrarily many UInt8, UInt16, UInt32 or UInt64 arguments"}},
.returned_value="A hash id [String](/docs/en/sql-reference/data-types/string.md).",
.examples={
{"simple",
"SELECT sqid(1, 2, 3, 4, 5);",
"SELECT sqidEncode(1, 2, 3, 4, 5);",
R"(
┌─sqidEncode(1, 2, 3, 4, 5)─┐
│ gXHfJ1C6dN │
└───────────────────────────┘
)"
}}
});
factory.registerAlias("sqid", FunctionSqidEncode::name);

factory.registerFunction<FunctionSqidDecode>(FunctionDocumentation{
.description=R"(
Transforms a [Sqid](https://sqids.org/) back into an array of numbers.)",
.syntax="sqidDecode(number1, ...)",
.arguments={{"sqid", "A sqid"}},
.returned_value="An array of [UInt64](/docs/en/sql-reference/data-types/int-uint.md).",
.examples={
{"simple",
"SELECT sqidDecode('gXHfJ1C6dN');",
R"(
┌─sqid(1, 2, 3, 4, 5)─┐
gXHfJ1C6dN
└─────────────────────┘
┌─sqidDecode('gXHfJ1C6dN')─┐
[1,2,3,4,5]
└──────────────────────────
)"
}}
});
Expand Down
18 changes: 11 additions & 7 deletions tests/queries/0_stateless/02933_sqid.reference
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
-- negative tests
-- const UInt*
Uk
XMbT
86Rf07
Td1EnWQo
Uk [1]
XMbT [1,2]
86Rf07 [1,2,3]
Td1EnWQo [1,2,3,4]
XMbT
-- non-const UInt*
Uk
Uk [1]
XMbT [1,2]
86Rf07 [1,2,3]
Td1EnWQo [1,2,3,4]
XMbT
86Rf07
Td1EnWQo
-- invalid sqid
[]
-- alias
XMbT
32 changes: 20 additions & 12 deletions tests/queries/0_stateless/02933_sqid.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,27 @@
SET allow_suspicious_low_cardinality_types = 1;

SELECT '-- negative tests';
SELECT sqid(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT sqid('1'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT sqidEncode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT sqidDecode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT sqidEncode('1'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT sqidDecode(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }

SELECT '-- const UInt*';
SELECT sqid(1);
SELECT sqid(1, 2);
SELECT sqid(1, 2, 3);
SELECT sqid(1::UInt8, 2::UInt16, 3::UInt32, 4::UInt64);
SELECT sqid(toNullable(1), toLowCardinality(2));
SELECT sqidEncode(1) AS sqid, sqidDecode(sqid);
SELECT sqidEncode(1, 2) AS sqid, sqidDecode(sqid);
SELECT sqidEncode(1, 2, 3) AS sqid, sqidDecode(sqid);
SELECT sqidEncode(1::UInt8, 2::UInt16, 3::UInt32, 4::UInt64) AS sqid, sqidDecode(sqid);
SELECT sqidEncode(toNullable(1), toLowCardinality(2)) AS sqid;

SELECT '-- non-const UInt*';
SELECT sqid(materialize(1));
SELECT sqid(materialize(1), materialize(2));
SELECT sqid(materialize(1), materialize(2), materialize(3));
SELECT sqid(materialize(1::UInt8), materialize(2::UInt16), materialize(3::UInt32), materialize(4::UInt64));
SELECT sqid(toNullable(materialize(1)), toLowCardinality(materialize(2)));
SELECT sqidEncode(materialize(1)) AS sqid, sqidDecode(sqid);
SELECT sqidEncode(materialize(1), materialize(2)) AS sqid, sqidDecode(sqid);
SELECT sqidEncode(materialize(1), materialize(2), materialize(3)) AS sqid, sqidDecode(sqid);
SELECT sqidEncode(materialize(1::UInt8), materialize(2::UInt16), materialize(3::UInt32), materialize(4::UInt64)) AS sqid, sqidDecode(sqid);
SELECT sqidEncode(toNullable(materialize(1)), toLowCardinality(materialize(2)));

SELECT '-- invalid sqid';
SELECT sqidDecode('invalid sqid');

SELECT '-- alias';
SELECT sqid(1, 2);
10 changes: 6 additions & 4 deletions utils/check-style/aspell-ignore/en/aspell-dict.txt
Original file line number Diff line number Diff line change
Expand Up @@ -701,8 +701,6 @@ PrettySpaceMonoBlock
PrettySpaceNoEscapes
PrettySpaceNoEscapesMonoBlock
Prewhere
TotalPrimaryKeyBytesInMemory
TotalPrimaryKeyBytesInMemoryAllocated
PrivateKeyPassphraseHandler
ProfileEvents
Profiler
Expand Down Expand Up @@ -924,6 +922,8 @@ Toolset
TopK
TotalBytesOfMergeTreeTables
TotalPartsOfMergeTreeTables
TotalPrimaryKeyBytesInMemory
TotalPrimaryKeyBytesInMemoryAllocated
TotalRowsOfMergeTreeTables
TotalTemporaryFiles
Tradeoff
Expand Down Expand Up @@ -1848,14 +1848,14 @@ metrica
metroHash
mfedotov
minMap
minSampleSizeContinuous
minSampleSizeConversion
mindsdb
minimalistic
mininum
miniselect
minmap
minmax
minSampleSizeContinuous
minSampleSizeConversion
mins
misconfiguration
mispredictions
Expand Down Expand Up @@ -2283,6 +2283,8 @@ splitByString
splitByWhitespace
splitby
sqid
sqidDecode
sqidEncode
sql
sqlalchemy
sqlinsert
Expand Down