Skip to content

Commit

Permalink
MDEV-6776 ujis and eucjmps erroneously accept 0x8EA0 as a valid byte …
Browse files Browse the repository at this point in the history
…sequence
  • Loading branch information
Alexander Barkov committed Sep 24, 2014
1 parent 9fa62b4 commit 3416fac
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 18 deletions.
17 changes: 17 additions & 0 deletions mysql-test/r/ctype_eucjpms.result
Original file line number Diff line number Diff line change
Expand Up @@ -33622,3 +33622,20 @@ hex(weight_string(cast(0x8FA2C38FA2C38FA2C3 as char),25, 4, 0xC0))
#
# End of 5.6 tests
#
#
# Start of 10.0 tests
#
#
# MDEV-6776 ujis and eucjmps erroneously accept 0x8EA0 as a valid byte sequence
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET eucjpms);
INSERT INTO t1 VALUES (0x8EA0);
SELECT HEX(a), CHAR_LENGTH(a) FROM t1;
HEX(a) CHAR_LENGTH(a)
0
DROP TABLE t1;
SELECT _eucjpms 0x8EA0;
ERROR HY000: Invalid eucjpms character string: '8EA0'
#
# End of 10.0 tests
#
17 changes: 17 additions & 0 deletions mysql-test/r/ctype_ujis.result
Original file line number Diff line number Diff line change
Expand Up @@ -25928,3 +25928,20 @@ hex(weight_string(cast(0x8FA2C38FA2C38FA2C3 as char),25, 4, 0xC0))
#
# End of 5.6 tests
#
#
# Start of 10.0 tests
#
#
# MDEV-6776 ujis and eucjmps erroneously accept 0x8EA0 as a valid byte sequence
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ujis);
INSERT INTO t1 VALUES (0x8EA0);
SELECT HEX(a), CHAR_LENGTH(a) FROM t1;
HEX(a) CHAR_LENGTH(a)
0
DROP TABLE t1;
SELECT _ujis 0x8EA0;
ERROR HY000: Invalid ujis character string: '8EA0'
#
# End of 10.0 tests
#
20 changes: 20 additions & 0 deletions mysql-test/t/ctype_eucjpms.test
Original file line number Diff line number Diff line change
Expand Up @@ -520,3 +520,23 @@ set collation_connection=eucjpms_bin;
--echo #
--echo # End of 5.6 tests
--echo #


--echo #
--echo # Start of 10.0 tests
--echo #

--echo #
--echo # MDEV-6776 ujis and eucjmps erroneously accept 0x8EA0 as a valid byte sequence
--echo #
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET eucjpms);
INSERT INTO t1 VALUES (0x8EA0);
SELECT HEX(a), CHAR_LENGTH(a) FROM t1;
DROP TABLE t1;
--error ER_INVALID_CHARACTER_STRING
SELECT _eucjpms 0x8EA0;


--echo #
--echo # End of 10.0 tests
--echo #
20 changes: 20 additions & 0 deletions mysql-test/t/ctype_ujis.test
Original file line number Diff line number Diff line change
Expand Up @@ -1349,3 +1349,23 @@ set collation_connection=ujis_bin;
--echo #
--echo # End of 5.6 tests
--echo #


--echo #
--echo # Start of 10.0 tests
--echo #

--echo #
--echo # MDEV-6776 ujis and eucjmps erroneously accept 0x8EA0 as a valid byte sequence
--echo #
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ujis);
INSERT INTO t1 VALUES (0x8EA0);
SELECT HEX(a), CHAR_LENGTH(a) FROM t1;
DROP TABLE t1;
--error ER_INVALID_CHARACTER_STRING
SELECT _ujis 0x8EA0;


--echo #
--echo # End of 10.0 tests
--echo #
17 changes: 8 additions & 9 deletions strings/ctype-eucjpms.c
Original file line number Diff line number Diff line change
Expand Up @@ -67418,10 +67418,10 @@ my_wc_mb_eucjpms(CHARSET_INFO *cs __attribute__((unused)),

/*
EUCJPMS encoding subcomponents:
[x00-x7F] # ASCII/JIS-Roman (one-byte/character)
[x8E][xA0-xDF] # half-width katakana (two bytes/char)
[x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char)
[xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char)
[x00-x7F] # ASCII/JIS-Roman (one-byte/character)
[x8E][xA1-xDF] # half-width katakana (two bytes/char)
[x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char)
[xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char)
*/

static
Expand All @@ -67444,15 +67444,15 @@ size_t my_well_formed_len_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
if (b >= (uchar *) end) /* need more bytes */
return (uint) (chbeg - beg); /* unexpected EOL */

if (ch == 0x8E) /* [x8E][xA0-xDF] */
if (iseucjpms_ss2(ch)) /* [x8E][xA1-xDF] */
{
if (*b >= 0xA0 && *b <= 0xDF)
if (iskata(*b))
continue;
*error=1;
return (uint) (chbeg - beg); /* invalid sequence */
}

if (ch == 0x8F) /* [x8F][xA1-xFE][xA1-xFE] */
if (iseucjpms_ss3(ch)) /* [x8F][xA1-xFE][xA1-xFE] */
{
ch= *b++;
if (b >= (uchar*) end)
Expand All @@ -67462,8 +67462,7 @@ size_t my_well_formed_len_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
}
}

if (ch >= 0xA1 && ch <= 0xFE &&
*b >= 0xA1 && *b <= 0xFE) /* [xA1-xFE][xA1-xFE] */
if (iseucjpms(ch) && iseucjpms(*b)) /* [xA1-xFE][xA1-xFE] */
continue;
*error=1;
return (size_t) (chbeg - beg); /* invalid sequence */
Expand Down
17 changes: 8 additions & 9 deletions strings/ctype-ujis.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,10 @@ static uint mbcharlen_ujis(CHARSET_INFO *cs __attribute__((unused)),uint c)

/*
EUC-JP encoding subcomponents:
[x00-x7F] # ASCII/JIS-Roman (one-byte/character)
[x8E][xA0-xDF] # half-width katakana (two bytes/char)
[x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char)
[xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char)
[x00-x7F] # ASCII/JIS-Roman (one-byte/character)
[x8E][xA1-xDF] # half-width katakana (two bytes/char)
[x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990 (three bytes/char)
[xA1-xFE][xA1-xFE] # JIS X 0208:1997 (two bytes/char)
*/

static
Expand All @@ -231,15 +231,15 @@ size_t my_well_formed_len_ujis(CHARSET_INFO *cs __attribute__((unused)),
return (size_t) (chbeg - beg); /* unexpected EOL */
}

if (ch == 0x8E) /* [x8E][xA0-xDF] */
if (isujis_ss2(ch)) /* [x8E][xA1-xDF] */
{
if (*b >= 0xA0 && *b <= 0xDF)
if (iskata(*b))
continue;
*error= 1;
return (size_t) (chbeg - beg); /* invalid sequence */
}

if (ch == 0x8F) /* [x8F][xA1-xFE][xA1-xFE] */
if (isujis_ss3(ch)) /* [x8F][xA1-xFE][xA1-xFE] */
{
ch= *b++;
if (b >= (uchar*) end)
Expand All @@ -249,8 +249,7 @@ size_t my_well_formed_len_ujis(CHARSET_INFO *cs __attribute__((unused)),
}
}

if (ch >= 0xA1 && ch <= 0xFE &&
*b >= 0xA1 && *b <= 0xFE) /* [xA1-xFE][xA1-xFE] */
if (isujis(ch) && isujis(*b)) /* [xA1-xFE][xA1-xFE] */
continue;
*error= 1;
return (size_t) (chbeg - beg); /* invalid sequence */
Expand Down

0 comments on commit 3416fac

Please sign in to comment.