Skip to content
Permalink
Browse files
MDEV-8844 Unreadable control characters printed as is in warnings
  • Loading branch information
abarkov committed Dec 6, 2019
1 parent 0044565 commit 3c6065a
Show file tree
Hide file tree
Showing 28 changed files with 517 additions and 75 deletions.
@@ -45,6 +45,19 @@ extern "C" {

#define MY_CS_REPLACEMENT_CHARACTER 0xFFFD

/**
Maximum character length of a string produced by wc_to_printable().
Note, wc_to_printable() is currently limited to BMP.
One non-printable or non-convertable character can produce a string
with at most 5 characters: \hhhh.
If we ever modify wc_to_printable() to support supplementary characters,
e.g. \+hhhhhh, this constant should be changed to 8.
Note, maximum octet length of a wc_to_printable() result can be calculated
as: (MY_CS_PRINTABLE_CHAR_LENGTH*cs->mbminlen).
*/
#define MY_CS_PRINTABLE_CHAR_LENGTH 5


/*
On i386 we store Unicode->CS conversion tables for
some character sets using Big-endian order,
@@ -740,6 +753,9 @@ int my_wc_mb_bin(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
int my_mb_ctype_8bit(CHARSET_INFO *,int *, const uchar *,const uchar *);
int my_mb_ctype_mb(CHARSET_INFO *,int *, const uchar *,const uchar *);

int my_wc_to_printable_generic(CHARSET_INFO *cs, my_wc_t wc,
uchar *s, uchar *e);

size_t my_scan_8bit(CHARSET_INFO *cs, const char *b, const char *e, int sq);

size_t my_snprintf_8bit(CHARSET_INFO *, char *to, size_t n,
@@ -3174,3 +3174,29 @@ DROP TABLE t1;
#
# End of 10.1 tests
#
#
# Start of 10.5 tests
#
#
# MDEV-8844 Unreadable control characters printed as is in warnings
#
SET NAMES binary;
CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET latin1, UNIQUE(a));
INSERT INTO t1 VALUES (0x61000162FF);
INSERT INTO t1 VALUES (0x61000162FF);
ERROR 23000: Duplicate entry 'a\0000\0001bÿ' for key 'a'
INSERT IGNORE INTO t1 VALUES (0x61000162FF);
Warnings:
Warning 1062 Duplicate entry 'a\0000\0001bÿ' for key 'a'
DROP TABLE t1;
CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8, UNIQUE(a));
INSERT INTO t1 VALUES (_latin1 0x61000162FF);
INSERT INTO t1 VALUES (_latin1 0x61000162FF);
ERROR 23000: Duplicate entry 'a\0000\0001bÿ' for key 'a'
INSERT IGNORE INTO t1 VALUES (_latin1 0x61000162FF);
Warnings:
Warning 1062 Duplicate entry 'a\0000\0001bÿ' for key 'a'
DROP TABLE t1;
#
# End of 10.5 tests
#
@@ -77,3 +77,31 @@ DROP TABLE t1;
--echo #
--echo # End of 10.1 tests
--echo #

--echo #
--echo # Start of 10.5 tests
--echo #

--echo #
--echo # MDEV-8844 Unreadable control characters printed as is in warnings
--echo #

SET NAMES binary;
CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET latin1, UNIQUE(a));
INSERT INTO t1 VALUES (0x61000162FF);
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (0x61000162FF);
INSERT IGNORE INTO t1 VALUES (0x61000162FF);
DROP TABLE t1;

CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8, UNIQUE(a));
INSERT INTO t1 VALUES (_latin1 0x61000162FF);
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (_latin1 0x61000162FF);
INSERT IGNORE INTO t1 VALUES (_latin1 0x61000162FF);
DROP TABLE t1;

--echo #
--echo # End of 10.5 tests
--echo #

@@ -8907,5 +8907,44 @@ t1 CREATE TABLE `t1` (
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
#
# MDEV-8844 Unreadable control characters printed as is in warnings
#
SET NAMES latin1;
SELECT CAST(_latin1 0x610062 AS INT);
CAST(_latin1 0x610062 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\0000b'
SELECT CAST(_latin1 0x610162 AS INT);
CAST(_latin1 0x610162 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\0001b'
SELECT CAST(_latin1 0x611F62 AS INT);
CAST(_latin1 0x611F62 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\001Fb'
SELECT CAST(_latin1 0x617F62 AS INT);
CAST(_latin1 0x617F62 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\007Fb'
SELECT CAST(_latin1 0x612062 AS INT);
CAST(_latin1 0x612062 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a b'
SELECT CAST(_latin1 0x617E62 AS INT);
CAST(_latin1 0x617E62 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a~b'
SELECT CAST(_latin1 0x61FF62 AS INT);
CAST(_latin1 0x61FF62 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a�b'
#
# End of 10.5 tests
#
@@ -456,6 +456,22 @@ CREATE OR REPLACE TABLE t1 AS SELECT CAST(1 AS BINARY), CAST(@a AS BINARY), CAST
SHOW CREATE TABLE t1;
DROP TABLE t1;


--echo #
--echo # MDEV-8844 Unreadable control characters printed as is in warnings
--echo #
SET NAMES latin1;
# control
SELECT CAST(_latin1 0x610062 AS INT);
SELECT CAST(_latin1 0x610162 AS INT);
SELECT CAST(_latin1 0x611F62 AS INT);
SELECT CAST(_latin1 0x617F62 AS INT);
# normal characters
SELECT CAST(_latin1 0x612062 AS INT);
SELECT CAST(_latin1 0x617E62 AS INT);
SELECT CAST(_latin1 0x61FF62 AS INT);


--echo #
--echo # End of 10.5 tests
--echo #
@@ -6431,3 +6431,45 @@ SET NAMES utf8;
#
# End of 10.4 tests
#
#
# Start of 10.5 tests
#
#
# MDEV-8844 Unreadable control characters printed as is in warnings
#
# control
SELECT CAST(_ucs2 0x006100000062 AS INT);
CAST(_ucs2 0x006100000062 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\0000b'
SELECT CAST(_ucs2 0x006100010062 AS INT);
CAST(_ucs2 0x006100010062 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\0001b'
# surrogate halfs
SELECT CAST(_ucs2 0x0061D8000062 AS INT);
CAST(_ucs2 0x0061D8000062 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\D800b'
SELECT CAST(_ucs2 0x0061DFFF0062 AS INT);
CAST(_ucs2 0x0061DFFF0062 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\DFFFb'
# normal characters
SELECT CAST(_ucs2 0x0061D7000062 AS INT);
CAST(_ucs2 0x0061D7000062 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a휀b'
SELECT CAST(_ucs2 0x0061E0030062 AS INT);
CAST(_ucs2 0x0061E0030062 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'ab'
#
# End of 10.5 tests
#
@@ -1124,3 +1124,28 @@ SET NAMES utf8;
--echo #
--echo # End of 10.4 tests
--echo #


--echo #
--echo # Start of 10.5 tests
--echo #

--echo #
--echo # MDEV-8844 Unreadable control characters printed as is in warnings
--echo #

--echo # control
SELECT CAST(_ucs2 0x006100000062 AS INT);
SELECT CAST(_ucs2 0x006100010062 AS INT);

--echo # surrogate halfs
SELECT CAST(_ucs2 0x0061D8000062 AS INT);
SELECT CAST(_ucs2 0x0061DFFF0062 AS INT);

--echo # normal characters
SELECT CAST(_ucs2 0x0061D7000062 AS INT);
SELECT CAST(_ucs2 0x0061E0030062 AS INT);

--echo #
--echo # End of 10.5 tests
--echo #
@@ -2814,3 +2814,18 @@ SET STORAGE_ENGINE=Default;
#
# End of 10.2 tests
#
#
# Start of 10.5 tests
#
#
# MDEV-8844 Unreadable control characters printed as is in warnings
#
SET NAMES utf8;
SELECT CAST(_utf16 0x0061D83DDE0E0062 AS INT);
CAST(_utf16 0x0061D83DDE0E0062 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a?b'
#
# End of 10.5 tests
#
@@ -934,3 +934,23 @@ let $coll_pad='utf16_bin';
--echo #
--echo # End of 10.2 tests
--echo #


--echo #
--echo # Start of 10.5 tests
--echo #

--echo #
--echo # MDEV-8844 Unreadable control characters printed as is in warnings
--echo #

SET NAMES utf8;
# Make sure surrogate halfs (when a part of a full utf16 character)
# are not escaped and the entire utf16 character consisting of two
# surrogate pairs is replaced to a single question mark.
SELECT CAST(_utf16 0x0061D83DDE0E0062 AS INT);


--echo #
--echo # End of 10.5 tests
--echo #
@@ -5407,18 +5407,21 @@ DROP TABLE t1;
#
# Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters
#
# Emulate utf8 client erroneously started with --default-character-set=latin1,
# # as in the bug report. EXPLAIN output should still be pretty readable
SET NAMES latin1;
EXPLAIN EXTENDED SELECT 'abcdÁÂÃÄÅ', _latin1'abcdÁÂÃÄÅ', _utf8'abcdÁÂÃÄÅ' AS u;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1003 select 'abcdÁÂÃÄÅ' AS `abcdÁÂÃÄÅ`,_latin1'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `abcdÁÂÃÄÅ`,_utf8'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `u`
Note 1003 select 'abcd�\0081ÂÃÄÅ' AS `abcd�\0081ÂÃÄÅ`,_latin1'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `abcd�\0081ÂÃÄÅ`,_utf8'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `u`
# Test normal utf8
SET NAMES utf8;
EXPLAIN EXTENDED SELECT 'abcdÁÂÃÄÅ', _latin1'abcdÁÂÃÄÅ', _utf8'abcdÁÂÃÄÅ';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1003 select 'abcdÁÂÃÄÅ' AS `abcdÁÂÃÄÅ`,_latin1'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `abcdÁÂÃÄÅ`,_utf8'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `abcdÁÂÃÄÅ`
Note 1003 select 'abcdÁÂÃÄÅ' AS `abcdÁÂÃÄÅ`,_latin1'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `abcdÃ\0081ÂÃÄÅ`,_utf8'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `abcdÁÂÃÄÅ`
#
# Bug#11750518 41090: ORDER BY TRUNCATES GROUP_CONCAT RESULT
#
@@ -11348,5 +11351,72 @@ SELECT uuid()>'';
uuid()>''
1
#
# MDEV-8844 Unreadable control characters printed as is in warnings
#
SET NAMES utf8;
# control, part1
SELECT CAST(_utf8 0x610062 AS INT);
CAST(_utf8 0x610062 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\0000b'
SELECT CAST(_utf8 0x610162 AS INT);
CAST(_utf8 0x610162 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\0001b'
SELECT CAST(_utf8 0x611F62 AS INT);
CAST(_utf8 0x611F62 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\001Fb'
# control, part2: U+0080..U+009F
SELECT CAST(_utf8 0x617F62 AS INT);
CAST(_utf8 0x617F62 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\007Fb'
SELECT CAST(_utf8 0x61C28062 AS INT);
CAST(_utf8 0x61C28062 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\0080b'
SELECT CAST(_utf8 0x61C29F62 AS INT);
CAST(_utf8 0x61C29F62 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a\009Fb'
# normal characters
SELECT CAST(_utf8 0x612062 AS INT);
CAST(_utf8 0x612062 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a b'
SELECT CAST(_utf8 0x617E62 AS INT);
CAST(_utf8 0x617E62 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a~b'
SELECT CAST(_utf8 0x61C2BF62 AS INT);
CAST(_utf8 0x61C2BF62 AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'a¿b'
SELECT CAST(_utf8 'ëëë' AS INT);
CAST(_utf8 'ëëë' AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'ëëë'
SELECT CAST(_utf8 'œœœ' AS INT);
CAST(_utf8 'œœœ' AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'œœœ'
SELECT CAST(_utf8 'яяя' AS INT);
CAST(_utf8 'яяя' AS INT)
0
Warnings:
Warning 1292 Truncated incorrect INTEGER value: 'яяя'
#
# End of 10.5 tests
#

0 comments on commit 3c6065a

Please sign in to comment.