Skip to content

Commit

Permalink
MDEV-30879 Add support for up to BASE 62 to CONV()
Browse files Browse the repository at this point in the history
BASE 62 uses 0-9, A-Z and then a-z to give the numbers 0-61. This patch
increases the range of the string functions to cover this.

Based on ideas and tests in PR #2589, but re-written into the charset
functions.

Includes fix by Sergei, UBSAN complained:
ctype-simple.c:683:38: runtime error: negation of -9223372036854775808
cannot be represented in type 'long long int'; cast to an unsigned
type to negate this value to itself

Co-authored-by: Weijun Huang <huangweijun1001@gmail.com>
Co-authored-by: Sergei Golubchik <serg@mariadb.org>
  • Loading branch information
3 people committed Jan 17, 2024
1 parent be6d48f commit f552feb
Show file tree
Hide file tree
Showing 10 changed files with 160 additions and 22 deletions.
1 change: 1 addition & 0 deletions include/m_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ extern "C" {
#endif

/* Declared in int2str() */
extern const char _dig_vec_base62[];
extern const char _dig_vec_upper[];
extern const char _dig_vec_lower[];

Expand Down
64 changes: 62 additions & 2 deletions mysql-test/main/func_str.result
Original file line number Diff line number Diff line change
Expand Up @@ -1078,8 +1078,8 @@ lpad(12345, 5, "#")
SELECT conv(71, 10, 36), conv('1Z', 36, 10);
conv(71, 10, 36) conv('1Z', 36, 10)
1Z 71
SELECT conv(71, 10, 37), conv('1Z', 37, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10);
conv(71, 10, 37) conv('1Z', 37, 10) conv(0,1,10) conv(0,0,10) conv(0,-1,10)
SELECT conv(71, 10, 63), conv('1Z', 63, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10);
conv(71, 10, 63) conv('1Z', 63, 10) conv(0,1,10) conv(0,0,10) conv(0,-1,10)
NULL NULL NULL NULL NULL
create table t1 (id int(1), str varchar(10)) DEFAULT CHARSET=utf8;
insert into t1 values (1,'aaaaaaaaaa'), (2,'bbbbbbbbbb');
Expand Down Expand Up @@ -5535,3 +5535,63 @@ aes_encrypt(a,a) is null
#
# End of 11.2 tests
#
#
# MDEV-30879 Add conversion to based 62 for CONV function
#
SELECT CONV('1z', 62, 10);
CONV('1z', 62, 10)
123
SELECT CONV('1Z', 62, 10);
CONV('1Z', 62, 10)
97
SELECT CONV('-1Z', 62, 10);
CONV('-1Z', 62, 10)
18446744073709551519
SELECT CONV('-1Z', -62, 10);
CONV('-1Z', -62, 10)
18446744073709551519
SELECT CONV('-1Z', 62, -10);
CONV('-1Z', 62, -10)
-97
SELECT CONV('-1Z', -62, -10);
CONV('-1Z', -62, -10)
-97
SELECT CONV('AzL8n0Y58m7', 62, 10);
CONV('AzL8n0Y58m7', 62, 10)
9223372036854775807
SELECT CONV('LygHa16AHYE', 62, 10);
CONV('LygHa16AHYE', 62, 10)
18446744073709551614
SELECT CONV('LygHa16AHYF', 62, 10);
CONV('LygHa16AHYF', 62, 10)
18446744073709551615
SELECT CONV('LygHa16AHZ0', 62, 10);
CONV('LygHa16AHZ0', 62, 10)
18446744073709551615
SELECT CONV('-AzL8n0Y58m7', -62, -10);
CONV('-AzL8n0Y58m7', -62, -10)
-9223372036854775807
SELECT CONV('-AzL8n0Y58m8', -62, -10);
CONV('-AzL8n0Y58m8', -62, -10)
-9223372036854775808
SELECT CONV('-AzL8n0Y58m9', -62, -10);
CONV('-AzL8n0Y58m9', -62, -10)
-9223372036854775808
SELECT CONV('-LygHa16AHZ0', -62, -10);
CONV('-LygHa16AHZ0', -62, -10)
-9223372036854775808
SELECT CONV('LygHa16AHYF', 63, 10);
CONV('LygHa16AHYF', 63, 10)
NULL
SELECT CONV(18446744073709551615, 10, 63);
CONV(18446744073709551615, 10, 63)
NULL
SELECT CONV(18446744073709551615, 10, 62);
CONV(18446744073709551615, 10, 62)
LygHa16AHYF
SELECT CONV(-9223372036854775808, -10, -62);
CONV(-9223372036854775808, -10, -62)
-AzL8n0Y58m8
#
# End of 11.4 tests
#
39 changes: 38 additions & 1 deletion mysql-test/main/func_str.test
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ SELECT lpad(12345, 5, "#");
#

SELECT conv(71, 10, 36), conv('1Z', 36, 10);
SELECT conv(71, 10, 37), conv('1Z', 37, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10);
SELECT conv(71, 10, 63), conv('1Z', 63, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10);

#
# Bug in SUBSTRING when mixed with CONCAT and ORDER BY (Bug #3089)
Expand Down Expand Up @@ -2481,3 +2481,40 @@ select aes_encrypt(a,a) is null from (values('a'),(NULL),('b')) x;
--echo #
--echo # End of 11.2 tests
--echo #

--echo #
--echo # MDEV-30879 Add conversion to based 62 for CONV function
--echo #

SELECT CONV('1z', 62, 10);
SELECT CONV('1Z', 62, 10);

SELECT CONV('-1Z', 62, 10);
SELECT CONV('-1Z', -62, 10);
SELECT CONV('-1Z', 62, -10);
SELECT CONV('-1Z', -62, -10);

# Check limits
SELECT CONV('AzL8n0Y58m7', 62, 10);
SELECT CONV('LygHa16AHYE', 62, 10);
SELECT CONV('LygHa16AHYF', 62, 10);

# Overflow doesn't appear to warn, but does overflow
SELECT CONV('LygHa16AHZ0', 62, 10);

SELECT CONV('-AzL8n0Y58m7', -62, -10);
SELECT CONV('-AzL8n0Y58m8', -62, -10);
SELECT CONV('-AzL8n0Y58m9', -62, -10);
SELECT CONV('-LygHa16AHZ0', -62, -10);

# Should NULL
SELECT CONV('LygHa16AHYF', 63, 10);
SELECT CONV(18446744073709551615, 10, 63);

# Test 10 -> 62
SELECT CONV(18446744073709551615, 10, 62);
SELECT CONV(-9223372036854775808, -10, -62);

--echo #
--echo # End of 11.4 tests
--echo #
4 changes: 2 additions & 2 deletions sql/item_strfunc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3936,8 +3936,8 @@ String *Item_func_conv::val_str(String *str)
// Note that abs(INT_MIN) is undefined.
if (args[0]->null_value || args[1]->null_value || args[2]->null_value ||
from_base == INT_MIN || to_base == INT_MIN ||
abs(to_base) > 36 || abs(to_base) < 2 ||
abs(from_base) > 36 || abs(from_base) < 2 || !(res->length()))
abs(to_base) > 62 || abs(to_base) < 2 ||
abs(from_base) > 62 || abs(from_base) < 2 || !(res->length()))
{
null_value= 1;
return NULL;
Expand Down
22 changes: 21 additions & 1 deletion strings/ctype-simple.c
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,11 @@ long my_strntol_8bit(CHARSET_INFO *cs,
else if (c>='A' && c<='Z')
c = c - 'A' + 10;
else if (c>='a' && c<='z')
{
c = c - 'a' + 10;
if (base > 36)
c += 26;
}
else
break;
if (c >= base)
Expand Down Expand Up @@ -546,7 +550,11 @@ ulong my_strntoul_8bit(CHARSET_INFO *cs,
else if (c>='A' && c<='Z')
c = c - 'A' + 10;
else if (c>='a' && c<='z')
{
c = c - 'a' + 10;
if (base > 36)
c += 26;
}
else
break;
if (c >= base)
Expand Down Expand Up @@ -634,7 +642,11 @@ longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)),
else if (c>='A' && c<='Z')
c = c - 'A' + 10;
else if (c>='a' && c<='z')
{
c = c - 'a' + 10;
if (base > 36)
c += 26;
}
else
break;
if (c >= base)
Expand All @@ -656,8 +668,12 @@ longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)),

if (negative)
{
if (i > (ulonglong) LONGLONG_MIN)
if (i >= (ulonglong) LONGLONG_MIN)
{
if (i == (ulonglong) LONGLONG_MIN)
return LONGLONG_MIN;
overflow = 1;
}
}
else if (i > (ulonglong) LONGLONG_MAX)
overflow = 1;
Expand Down Expand Up @@ -731,7 +747,11 @@ ulonglong my_strntoull_8bit(CHARSET_INFO *cs,
else if (c>='A' && c<='Z')
c = c - 'A' + 10;
else if (c>='a' && c<='z')
{
c = c - 'a' + 10;
if (base > 36)
c += 26;
}
else
break;
if (c >= base)
Expand Down
8 changes: 8 additions & 0 deletions strings/ctype-ucs2.c
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,11 @@ my_strntoll_mb2_or_mb4(CHARSET_INFO *cs,
else if ( wc>='A' && wc<='Z')
wc = wc - 'A' + 10;
else if ( wc>='a' && wc<='z')
{
wc = wc - 'a' + 10;
if (base > 36)
wc += 26;
}
else
break;
if ((int)wc >= base)
Expand Down Expand Up @@ -575,7 +579,11 @@ my_strntoull_mb2_or_mb4(CHARSET_INFO *cs,
else if ( wc>='A' && wc<='Z')
wc = wc - 'A' + 10;
else if ( wc>='a' && wc<='z')
{
wc = wc - 'a' + 10;
if (base > 36)
wc += 26;
}
else
break;
if ((int)wc >= base)
Expand Down
15 changes: 11 additions & 4 deletions strings/int2str.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
/*
_dig_vec arrays are public because they are used in several outer places.
*/
const char _dig_vec_base62[] =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
const char _dig_vec_upper[] =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const char _dig_vec_lower[] =
Expand All @@ -50,7 +52,7 @@ const char _dig_vec_lower[] =
DESCRIPTION
Converts the (long) integer value to its character form and moves it to
the destination buffer followed by a terminating NUL.
If radix is -2..-36, val is taken to be SIGNED, if radix is 2..36, val is
If radix is -2..-62, val is taken to be SIGNED, if radix is 2..62, val is
taken to be UNSIGNED. That is, val is signed if and only if radix is.
All other radixes treated as bad and nothing will be changed in this case.
Expand All @@ -68,12 +70,17 @@ int2str(register long int val, register char *dst, register int radix,
char buffer[65];
register char *p;
long int new_val;
const char *dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower;
const char *dig_vec;
ulong uval= (ulong) val;

if (radix < -36 || radix > 36)
dig_vec= _dig_vec_base62;
else
dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower;

if (radix < 0)
{
if (radix < -36 || radix > -2)
if (radix < -62 || radix > -2)
return NullS;
if (val < 0)
{
Expand All @@ -83,7 +90,7 @@ int2str(register long int val, register char *dst, register int radix,
}
radix = -radix;
}
else if (radix > 36 || radix < 2)
else if (radix > 62 || radix < 2)
return NullS;

/*
Expand Down
15 changes: 10 additions & 5 deletions strings/longlong2str.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
result is normally a pointer to this NUL character, but if the radix
is dud the result will be NullS and nothing will be changed.
If radix is -2..-36, val is taken to be SIGNED.
If radix is 2.. 36, val is taken to be UNSIGNED.
If radix is -2..-62, val is taken to be SIGNED.
If radix is 2.. 62, val is taken to be UNSIGNED.
That is, val is signed if and only if radix is. You will normally
use radix -10 only through itoa and ltoa, for radix 2, 8, or 16
unsigned is what you generally want.
Expand All @@ -63,12 +63,17 @@ char *ll2str(longlong val,char *dst,int radix, int upcase)
char buffer[65];
register char *p;
long long_val;
const char *dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower;
const char *dig_vec;
ulonglong uval= (ulonglong) val;

if (radix < -36 || radix > 36)
dig_vec= _dig_vec_base62;
else
dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower;

if (radix < 0)
{
if (radix < -36 || radix > -2) return (char*) 0;
if (radix < -62 || radix > -2) return (char*) 0;
if (val < 0) {
*dst++ = '-';
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
Expand All @@ -78,7 +83,7 @@ char *ll2str(longlong val,char *dst,int radix, int upcase)
}
else
{
if (radix > 36 || radix < 2) return (char*) 0;
if (radix > 62 || radix < 2) return (char*) 0;
}
if (uval == 0)
{
Expand Down
10 changes: 5 additions & 5 deletions strings/str2int.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@
#include "my_sys.h" /* defines errno */
#include <errno.h>

#define char_val(X) (X >= '0' && X <= '9' ? X-'0' :\
#define char_val(X, Y) (X >= '0' && X <= '9' ? X-'0' :\
X >= 'A' && X <= 'Z' ? X-'A'+10 :\
X >= 'a' && X <= 'z' ? X-'a'+10 :\
X >= 'a' && X <= 'z' ? (Y <= 36 ? X-'a'+10 : X-'a'+36) :\
'\177')

char *str2int(register const char *src, register int radix, long int lower,
Expand All @@ -76,10 +76,10 @@ char *str2int(register const char *src, register int radix, long int lower,

*val = 0;

/* Check that the radix is in the range 2..36 */
/* Check that the radix is in the range 2..62 */

#ifndef DBUG_OFF
if (radix < 2 || radix > 36) {
if (radix < 2 || radix > 62) {
errno=EDOM;
return NullS;
}
Expand Down Expand Up @@ -126,7 +126,7 @@ char *str2int(register const char *src, register int radix, long int lower,
to left in order to avoid overflow. Answer is after last digit.
*/

for (n = 0; (digits[n]=char_val(*src)) < radix && n < 20; n++,src++) ;
for (n = 0; (digits[n]=char_val(*src, radix)) < radix && n < 20; n++,src++) ;

/* Check that there is at least one digit */

Expand Down
4 changes: 2 additions & 2 deletions strings/string.doc
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ Speciella anv
the destination string "dst" followed by a terminating NUL. The
result is normally a pointer to this NUL character, but if the radix
is dud the result will be NullS and nothing will be changed.
If radix is -2..-36, val is taken to be SIGNED.
If radix is 2.. 36, val is taken to be UNSIGNED.
If radix is -2..-62, val is taken to be SIGNED.
If radix is 2.. 62, val is taken to be UNSIGNED.
That is, val is signed if and only if radix is. You will normally
use radix -10 only through itoa and ltoa, for radix 2, 8, or 16
unsigned is what you generally want.
Expand Down

0 comments on commit f552feb

Please sign in to comment.