Skip to content

Commit 8286bcd

Browse files
author
Alexander Barkov
committed
MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
1 parent 391fddf commit 8286bcd

File tree

5 files changed

+96
-4
lines changed

5 files changed

+96
-4
lines changed

mysql-test/r/ctype_latin1.result

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7660,5 +7660,60 @@ DROP FUNCTION iswellformed;
76607660
DROP TABLE allbytes;
76617661
# End of ctype_backslash.inc
76627662
#
7663+
# MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
7664+
#
7665+
SET NAMES utf8, character_set_connection=latin1;
7666+
SELECT '�';
7667+
?
7668+
?
7669+
SELECT HEX('�');
7670+
HEX('�')
7671+
3F
7672+
SELECT HEX(CAST('�' AS CHAR CHARACTER SET utf8));
7673+
HEX(CAST('�' AS CHAR CHARACTER SET utf8))
7674+
3F
7675+
SELECT HEX(CAST('�' AS CHAR CHARACTER SET latin1));
7676+
HEX(CAST('�' AS CHAR CHARACTER SET latin1))
7677+
3F
7678+
SELECT HEX(CONVERT('�' USING utf8));
7679+
HEX(CONVERT('�' USING utf8))
7680+
3F
7681+
SELECT HEX(CONVERT('�' USING latin1));
7682+
HEX(CONVERT('�' USING latin1))
7683+
3F
7684+
SELECT '�x';
7685+
?x
7686+
?x
7687+
SELECT HEX('�x');
7688+
HEX('�x')
7689+
3F78
7690+
SELECT HEX(CAST('�x' AS CHAR CHARACTER SET utf8));
7691+
HEX(CAST('�x' AS CHAR CHARACTER SET utf8))
7692+
3F78
7693+
SELECT HEX(CAST('�x' AS CHAR CHARACTER SET latin1));
7694+
HEX(CAST('�x' AS CHAR CHARACTER SET latin1))
7695+
3F78
7696+
SELECT HEX(CONVERT('�x' USING utf8));
7697+
HEX(CONVERT('�x' USING utf8))
7698+
3F78
7699+
SELECT HEX(CONVERT('�x' USING latin1));
7700+
HEX(CONVERT('�x' USING latin1))
7701+
3F78
7702+
SET NAMES utf8;
7703+
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
7704+
INSERT INTO t1 VALUES ('�'),('�#');
7705+
Warnings:
7706+
Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1
7707+
Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2
7708+
SHOW WARNINGS;
7709+
Level Code Message
7710+
Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1
7711+
Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2
7712+
SELECT HEX(a),a FROM t1;
7713+
HEX(a) a
7714+
3F ?
7715+
3F23 ?#
7716+
DROP TABLE t1;
7717+
#
76637718
# End of 10.0 tests
76647719
#

mysql-test/suite/sys_vars/r/character_set_client_func.result

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ SET @@session.character_set_client = utf8;
3030
INSERT INTO t1 values('�');
3131
SELECT hex(a),CHAR_LENGTH(a) FROM t1;
3232
hex(a) CHAR_LENGTH(a)
33-
03 1
33+
033F 2
3434
DELETE FROM t1;
3535
DROP TABLE IF EXISTS t1;
3636
SET @@global.character_set_client = @global_character_set_client;

mysql-test/t/ctype_latin1.test

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,29 @@ set names latin1;
210210
let $ctype_unescape_combinations=selected;
211211
--source include/ctype_unescape.inc
212212

213+
--echo #
214+
--echo # MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
215+
--echo #
216+
SET NAMES utf8, character_set_connection=latin1;
217+
SELECT '�';
218+
SELECT HEX('�');
219+
SELECT HEX(CAST('�' AS CHAR CHARACTER SET utf8));
220+
SELECT HEX(CAST('�' AS CHAR CHARACTER SET latin1));
221+
SELECT HEX(CONVERT('�' USING utf8));
222+
SELECT HEX(CONVERT('�' USING latin1));
223+
SELECT '�x';
224+
SELECT HEX('�x');
225+
SELECT HEX(CAST('�x' AS CHAR CHARACTER SET utf8));
226+
SELECT HEX(CAST('�x' AS CHAR CHARACTER SET latin1));
227+
SELECT HEX(CONVERT('�x' USING utf8));
228+
SELECT HEX(CONVERT('�x' USING latin1));
229+
SET NAMES utf8;
230+
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
231+
INSERT INTO t1 VALUES ('�'),('�#');
232+
SHOW WARNINGS;
233+
SELECT HEX(a),a FROM t1;
234+
DROP TABLE t1;
235+
213236
--echo #
214237
--echo # End of 10.0 tests
215238
--echo #

sql/sql_string.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,8 +1022,15 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
10221022
wc= '?';
10231023
}
10241024
else
1025-
break; // Not enough characters
1026-
1025+
{
1026+
if ((uchar *) from >= from_end)
1027+
break; // End of line
1028+
// Incomplete byte sequence
1029+
if (!*well_formed_error_pos)
1030+
*well_formed_error_pos= from;
1031+
from++;
1032+
wc= '?';
1033+
}
10271034
outp:
10281035
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
10291036
to+= cnvres;

strings/ctype.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1066,7 +1066,14 @@ my_convert_internal(char *to, uint32 to_length,
10661066
wc= '?';
10671067
}
10681068
else
1069-
break; // Not enough characters
1069+
{
1070+
if ((uchar *) from >= from_end)
1071+
break; /* End of line */
1072+
/* Incomplete byte sequence */
1073+
error_count++;
1074+
from++;
1075+
wc= '?';
1076+
}
10701077

10711078
outp:
10721079
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)

0 commit comments

Comments
 (0)