Skip to content

Commit e9aac09

Browse files
committed
MDEV-25440: Indexed CHAR columns are broken with NO_PAD collations
cmp_data(): Compare different-length CHAR fields with the new strnncollsp_nchars function that will pad spaces if needed. Any InnoDB ROW_FORMAT except the original one that was named ROW_FORMAT=REDUNDANT in MySQL 5.0.3 will internally store CHAR(n) columns as variable-length if the character encoding is variable length. Spaces may be trimmed from the end. For NOT NULL values, the minimum length is always n*mbminlen. In cmp_data() we only know the lengths in bytes and we cannot easily know the ROW_FORMAT. is_strnncoll_compatible(): Refactored from innobase_mysql_cmp(). innobase_mysql_cmp(): Merged to cmp_whole_field(). cmp_whole_field(): Invoke strnncollsp_nchars for the DATA_MYSQL (the CHAR type with any other collation than latin1_swedish_ci). Reviewed by: Alexander Barkov Tested by: Roel Roel Van de Paar
1 parent 37144af commit e9aac09

File tree

3 files changed

+80
-103
lines changed

3 files changed

+80
-103
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CREATE TABLE t1 (a CHAR(8), id INT, PRIMARY KEY (a,id)) COLLATE utf8_nopad_bin
2+
ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
3+
INSERT INTO t1 VALUES ('',1);
4+
ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
5+
INSERT INTO t1 VALUES ('',2);
6+
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
7+
DROP TABLE t1;

mysql-test/suite/innodb/t/no_pad.test

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
--source include/have_innodb.inc
2+
3+
CREATE TABLE t1 (a CHAR(8), id INT, PRIMARY KEY (a,id)) COLLATE utf8_nopad_bin
4+
ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
5+
6+
INSERT INTO t1 VALUES ('',1);
7+
ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
8+
INSERT INTO t1 VALUES ('',2);
9+
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
10+
DROP TABLE t1;

storage/innobase/rem/rem0cmp.cc

Lines changed: 63 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*****************************************************************************
22
33
Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved.
4-
Copyright (c) 2020, 2021, MariaDB Corporation.
4+
Copyright (c) 2020, 2022, MariaDB Corporation.
55
66
This program is free software; you can redistribute it and/or modify it under
77
the terms of the GNU General Public License as published by the Free Software
@@ -49,49 +49,25 @@ At the present, the comparison functions return 0 in the case,
4949
where two records disagree only in the way that one
5050
has more fields than the other. */
5151

52-
/** Compare two data fields.
53-
@param[in] prtype precise type
54-
@param[in] a data field
55-
@param[in] a_length length of a, in bytes (not UNIV_SQL_NULL)
56-
@param[in] b data field
57-
@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
58-
@return positive, 0, negative, if a is greater, equal, less than b,
59-
respectively */
60-
UNIV_INLINE
61-
int
62-
innobase_mysql_cmp(
63-
ulint prtype,
64-
const byte* a,
65-
unsigned int a_length,
66-
const byte* b,
67-
unsigned int b_length)
52+
#ifndef DBUG_OFF
53+
/** @return whether a data type is compatible with strnncoll() functions */
54+
static bool is_strnncoll_compatible(ulint type)
6855
{
69-
#ifdef UNIV_DEBUG
70-
switch (prtype & DATA_MYSQL_TYPE_MASK) {
71-
case MYSQL_TYPE_BIT:
72-
case MYSQL_TYPE_STRING:
73-
case MYSQL_TYPE_VAR_STRING:
74-
case MYSQL_TYPE_TINY_BLOB:
75-
case MYSQL_TYPE_MEDIUM_BLOB:
76-
case MYSQL_TYPE_BLOB:
77-
case MYSQL_TYPE_LONG_BLOB:
78-
case MYSQL_TYPE_VARCHAR:
79-
break;
80-
default:
81-
ut_error;
82-
}
83-
#endif /* UNIV_DEBUG */
84-
85-
uint cs_num = (uint) dtype_get_charset_coll(prtype);
86-
87-
if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
88-
return(cs->coll->strnncollsp(
89-
cs, a, a_length, b, b_length));
90-
}
91-
92-
ib::fatal() << "Unable to find charset-collation " << cs_num;
93-
return(0);
56+
switch (type) {
57+
case MYSQL_TYPE_BIT:
58+
case MYSQL_TYPE_STRING:
59+
case MYSQL_TYPE_VAR_STRING:
60+
case MYSQL_TYPE_TINY_BLOB:
61+
case MYSQL_TYPE_MEDIUM_BLOB:
62+
case MYSQL_TYPE_BLOB:
63+
case MYSQL_TYPE_LONG_BLOB:
64+
case MYSQL_TYPE_VARCHAR:
65+
return true;
66+
default:
67+
return false;
68+
}
9469
}
70+
#endif /* DBUG_OFF */
9571

9672
/*************************************************************//**
9773
Returns TRUE if two columns are equal for comparison purposes.
@@ -309,68 +285,52 @@ cmp_gis_field(
309285
@param[in] b_length length of b, in bytes (not UNIV_SQL_NULL)
310286
@return positive, 0, negative, if a is greater, equal, less than b,
311287
respectively */
312-
static
313-
int
314-
cmp_whole_field(
315-
ulint mtype,
316-
ulint prtype,
317-
const byte* a,
318-
unsigned int a_length,
319-
const byte* b,
320-
unsigned int b_length)
288+
static int cmp_whole_field(ulint mtype, ulint prtype,
289+
const byte *a, unsigned a_length,
290+
const byte *b, unsigned b_length)
321291
{
322-
float f_1;
323-
float f_2;
324-
double d_1;
325-
double d_2;
326-
327-
switch (mtype) {
328-
case DATA_DECIMAL:
329-
return(cmp_decimal(a, a_length, b, b_length));
330-
case DATA_DOUBLE:
331-
d_1 = mach_double_read(a);
332-
d_2 = mach_double_read(b);
333-
334-
if (d_1 > d_2) {
335-
return(1);
336-
} else if (d_2 > d_1) {
337-
return(-1);
338-
}
339-
340-
return(0);
341-
342-
case DATA_FLOAT:
343-
f_1 = mach_float_read(a);
344-
f_2 = mach_float_read(b);
345-
346-
if (f_1 > f_2) {
347-
return(1);
348-
} else if (f_2 > f_1) {
349-
return(-1);
350-
}
351-
352-
return(0);
353-
case DATA_VARCHAR:
354-
case DATA_CHAR:
355-
return(my_charset_latin1.coll->strnncollsp(
356-
&my_charset_latin1,
357-
a, a_length, b, b_length));
358-
case DATA_BLOB:
359-
if (prtype & DATA_BINARY_TYPE) {
360-
ib::error() << "Comparing a binary BLOB"
361-
" using a character set collation!";
362-
ut_ad(0);
363-
}
364-
/* fall through */
365-
case DATA_VARMYSQL:
366-
case DATA_MYSQL:
367-
return(innobase_mysql_cmp(prtype,
368-
a, a_length, b, b_length));
369-
default:
370-
ib::fatal() << "Unknown data type number " << mtype;
371-
}
372-
373-
return(0);
292+
switch (mtype) {
293+
default:
294+
ib::fatal() << "Unknown data type number " << mtype;
295+
return 0;
296+
case DATA_DECIMAL:
297+
return cmp_decimal(a, a_length, b, b_length);
298+
case DATA_DOUBLE:
299+
{
300+
const double af= mach_double_read(a), bf= mach_double_read(b);
301+
return af > bf ? 1 : bf > af ? -1 : 0;
302+
}
303+
case DATA_FLOAT:
304+
{
305+
const float af= mach_float_read(a), bf= mach_float_read(b);
306+
return af > bf ? 1 : bf > af ? -1 : 0;
307+
}
308+
case DATA_VARCHAR:
309+
case DATA_CHAR:
310+
/* latin1_swedish_ci is treated as a special case in InnoDB.
311+
Because it is a fixed-length encoding (mbminlen=mbmaxlen=1),
312+
non-NULL CHAR(n) values will always occupy n bytes and we
313+
can invoke strnncollsp() instead of strnncollsp_nchars(). */
314+
return my_charset_latin1.coll->strnncollsp(&my_charset_latin1,
315+
a, a_length, b, b_length);
316+
case DATA_BLOB:
317+
ut_ad(!(prtype & DATA_BINARY_TYPE)); /* our only caller tested this */
318+
/* fall through */
319+
case DATA_VARMYSQL:
320+
DBUG_ASSERT(is_strnncoll_compatible(prtype & DATA_MYSQL_TYPE_MASK));
321+
if (CHARSET_INFO *cs= get_charset(dtype_get_charset_coll(prtype),
322+
MYF(MY_WME)))
323+
return cs->coll->strnncollsp(cs, a, a_length, b, b_length);
324+
break;
325+
case DATA_MYSQL:
326+
DBUG_ASSERT(is_strnncoll_compatible(prtype & DATA_MYSQL_TYPE_MASK));
327+
if (CHARSET_INFO *cs= get_charset(dtype_get_charset_coll(prtype),
328+
MYF(MY_WME)))
329+
return cs->coll->strnncollsp_nchars(cs, a, a_length, b, b_length,
330+
std::max(a_length, b_length));
331+
}
332+
333+
ib::fatal() << "Unable to find charset-collation for " << prtype;
374334
}
375335

376336
/** Compare two data fields.

0 commit comments

Comments
 (0)