From b2dc5ada6e1702332d15fbd515c728e5d06cb7d2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 17 Nov 2023 11:31:52 +0000 Subject: [PATCH] Fix tryDecodeBase64() with invalid input --- src/Functions/FunctionBase64Conversion.h | 10 ++++----- .../00732_base64_functions.reference | 6 ++--- .../0_stateless/00732_base64_functions.sql | 22 +++++++++++++------ 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index f52dec0eaf76..de922747ccdd 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -76,12 +76,10 @@ struct TryBase64Decode static size_t perform(const std::span src, UInt8 * dst) { size_t outlen = 0; - base64_decode(reinterpret_cast(src.data()), src.size(), reinterpret_cast(dst), &outlen, 0); + int rc = base64_decode(reinterpret_cast(src.data()), src.size(), reinterpret_cast(dst), &outlen, 0); - // during decoding character array can be partially polluted - // if fail, revert back and clean - if (!outlen) - *dst = 0; + if (rc != 1) + outlen = 0; return outlen; } @@ -147,7 +145,7 @@ class FunctionBase64Conversion : public IFunction for (size_t row = 0; row < src_row_count; ++row) { const size_t src_length = src_offsets[row] - src_offset_prev - 1; - const auto outlen = Func::perform({src, src_length}, dst_pos); + const size_t outlen = Func::perform({src, src_length}, dst_pos); /// Base64 library is using AVX-512 with some shuffle operations. /// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. diff --git a/tests/queries/0_stateless/00732_base64_functions.reference b/tests/queries/0_stateless/00732_base64_functions.reference index f97c19427e7b..8f91ffa74aba 100644 --- a/tests/queries/0_stateless/00732_base64_functions.reference +++ b/tests/queries/0_stateless/00732_base64_functions.reference @@ -21,9 +21,9 @@ fooba foobar 1 1 1 1 -fooba -~Š + + + Zm9v foo foo -TEcgT3B0aW11cw== diff --git a/tests/queries/0_stateless/00732_base64_functions.sql b/tests/queries/0_stateless/00732_base64_functions.sql index 99268004003d..3c60bf939fe3 100644 --- a/tests/queries/0_stateless/00732_base64_functions.sql +++ b/tests/queries/0_stateless/00732_base64_functions.sql @@ -2,17 +2,23 @@ SET send_logs_level = 'fatal'; -SELECT base64Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val); +SELECT base64Encode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64Decode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryBase64Decode(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64Encode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT base64Decode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT tryBase64Decode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +-- test with valid inputs +SELECT base64Encode(val) FROM (select arrayJoin(['', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar']) val); SELECT base64Decode(val) FROM (select arrayJoin(['', 'Zg==', 'Zm8=', 'Zm9v', 'Zm9vYg==', 'Zm9vYmE=', 'Zm9vYmFy']) val); SELECT tryBase64Decode(val) FROM (select arrayJoin(['', 'Zg==', 'Zm8=', 'Zm9v', 'Zm9vYg==', 'Zm9vYmE=', 'Zm9vYmFy']) val); SELECT base64Decode(base64Encode('foo')) = 'foo', base64Encode(base64Decode('Zm9v')) == 'Zm9v'; SELECT tryBase64Decode(base64Encode('foo')) = 'foo', base64Encode(tryBase64Decode('Zm9v')) == 'Zm9v'; -SELECT base64Encode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT base64Decode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT tryBase64Decode('foo', 'excess argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +-- test with invalid inputs SELECT base64Decode('Zm9vYmF=Zm9v'); -- { serverError INCORRECT_DATA } SELECT tryBase64Decode('Zm9vYmF=Zm9v'); @@ -20,9 +26,11 @@ SELECT tryBase64Decode('Zm9vYmF=Zm9v'); SELECT base64Decode('foo'); -- { serverError INCORRECT_DATA } SELECT tryBase64Decode('foo'); +SELECT base64Decode('aoeo054640eu='); -- { serverError INCORRECT_DATA } +SELECT tryBase64Decode('aoeo054640eu='); + +-- test FixedString arguments + select base64Encode(toFixedString('foo', 3)); select base64Decode(toFixedString('Zm9v', 4)); select tryBase64Decode(toFixedString('Zm9v', 4)); - --- This query reproduces a bug in TurboBase64 library (which we no longer use) -select distinct base64Encode(materialize('LG Optimus')) from numbers(100);