Skip to content

Commit

Permalink
MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets
Browse files Browse the repository at this point in the history
TYPELIBs for ENUM/SET columns could erroneously undergo redundant
hex-unescaping at the table open time.

Fix:
- Prevent multiple unescaping of the same TYPELIB
- Prevent sharing TYPELIBs between columns with different mbminlen
  • Loading branch information
abarkov committed Mar 17, 2022
1 parent 118826d commit 22fd31c
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 3 deletions.
25 changes: 25 additions & 0 deletions mysql-test/r/ctype_utf32.result
Original file line number Diff line number Diff line change
Expand Up @@ -2913,5 +2913,30 @@ t1 CREATE TABLE `t1` (
DROP TABLE t1;
SET NAMES utf8;
#
# MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets
#
CREATE TABLE t1 (
c1 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a',
c2 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a'
);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` enum('a','b') CHARACTER SET utf32 DEFAULT 'a',
`c2` enum('a','b') CHARACTER SET utf32 DEFAULT 'a'
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
CREATE TABLE t1 (
c1 ENUM ('00000061','00000062') DEFAULT '00000061' COLLATE latin1_bin,
c2 ENUM ('a','b') DEFAULT 'a' COLLATE utf32_general_ci
);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` enum('00000061','00000062') CHARACTER SET latin1 COLLATE latin1_bin DEFAULT '00000061',
`c2` enum('a','b') CHARACTER SET utf32 DEFAULT 'a'
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
#
# End of 10.2 tests
#
19 changes: 19 additions & 0 deletions mysql-test/t/ctype_utf32.test
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,25 @@ DROP TABLE t1;
SET NAMES utf8;


--echo #
--echo # MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets
--echo #

CREATE TABLE t1 (
c1 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a',
c2 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a'
);
SHOW CREATE TABLE t1;
DROP TABLE t1;

CREATE TABLE t1 (
c1 ENUM ('00000061','00000062') DEFAULT '00000061' COLLATE latin1_bin,
c2 ENUM ('a','b') DEFAULT 'a' COLLATE utf32_general_ci
);
SHOW CREATE TABLE t1;
DROP TABLE t1;


--echo #
--echo # End of 10.2 tests
--echo #
20 changes: 18 additions & 2 deletions sql/table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1229,6 +1229,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
plugin_ref se_plugin= 0;
MEM_ROOT *old_root= thd->mem_root;
Virtual_column_info **table_check_constraints;
bool *interval_unescaped= NULL;
DBUG_ENTER("TABLE_SHARE::init_from_binary_frm_image");

keyinfo= &first_keyinfo;
Expand Down Expand Up @@ -1686,6 +1687,13 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,

goto err;

if (interval_count)
{
if (!(interval_unescaped= (bool*) my_alloca(interval_count * sizeof(bool))))
goto err;
bzero(interval_unescaped, interval_count * sizeof(bool));
}

field_ptr= share->field;
table_check_constraints= share->check_constraints;
read_length=(uint) (share->fields * field_pack_length +
Expand Down Expand Up @@ -1956,11 +1964,17 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
if (share->mysql_version < 100200)
pack_flag&= ~FIELDFLAG_LONG_DECIMAL;

if (interval_nr && charset->mbminlen > 1)
if (interval_nr && charset->mbminlen > 1 &&
!interval_unescaped[interval_nr - 1])
{
/* Unescape UCS2 intervals from HEX notation */
/*
Unescape UCS2/UTF16/UTF32 intervals from HEX notation.
Note, ENUM/SET columns with equal value list share a single
copy of TYPELIB. Unescape every TYPELIB only once.
*/
TYPELIB *interval= share->intervals + interval_nr - 1;
unhex_type2(interval);
interval_unescaped[interval_nr - 1]= true;
}

#ifndef TO_BE_DELETED_ON_PRODUCTION
Expand Down Expand Up @@ -2610,6 +2624,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
share->error= OPEN_FRM_OK;
thd->status_var.opened_shares++;
thd->mem_root= old_root;
my_afree(interval_unescaped);
DBUG_RETURN(0);

err:
Expand All @@ -2623,6 +2638,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
open_table_error(share, OPEN_FRM_CORRUPTED, share->open_errno);

thd->mem_root= old_root;
my_afree(interval_unescaped);
DBUG_RETURN(HA_ERR_NOT_A_TABLE);
}

Expand Down
11 changes: 10 additions & 1 deletion sql/unireg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,16 @@ static uint get_interval_id(uint *int_count,List<Create_field> &create_fields,

while ((field=it++) != last_field)
{
if (field->interval_id && field->interval->count == interval->count)
/*
ENUM/SET columns with equal value lists share a single
copy of the underlying TYPELIB.
Fields with different mbminlen can't reuse TYPELIBs, because:
- mbminlen==1 are written to FRM as is
- mbminlen>1 are written to FRM in hex-encoded format
*/
if (field->interval_id &&
field->interval->count == interval->count &&
field->charset->mbminlen == last_field->charset->mbminlen)
{
const char **a,**b;
for (a=field->interval->type_names, b=interval->type_names ;
Expand Down

0 comments on commit 22fd31c

Please sign in to comment.