Skip to content

Commit 9e7afa7

Browse files
committed
Extra tests for MDEV-30716 Wrong casefolding in xxx_unicode_520_ci for U+0700..U+07FF
New tests display additional information about characters from the BMP range: - A summary with a COUNT(*) for all distinct combinations of properties telling how the "=" and the "LIKE" predicates compare characters to their LOWER() and UPPER() variants. - A detailed list of trciky characters for which the "=" and the "LIKE" predicates compare LOWER(c)/UPPER(c) variants as not equal to just "c". Tricky characters include: - Turkish letters: ı - small dotless letter i - Croatian letters: precombined contractions for Dž, Dz, Lj, Nj - Units of measurement: Ω,K,Å (Ohm, Kelvin, Angstrom) These ones look very similar to Greek letter Omega, Latin letter Kra, Swedish/Finnish letter A with a ring above.
1 parent 929c2e0 commit 9e7afa7

18 files changed

+746
-8
lines changed

mysql-test/include/ctype_unicode_casefold_bmp.inc

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,58 @@ FROM
1313

1414
SELECT COLLATION(c) FROM v_bmp LIMIT 1;
1515

16+
--echo #
17+
--echo # BMP character summary
18+
--echo #
19+
20+
SELECT
21+
BINARY(c)=BINARY(LOWER(c)) AS `Bc=BLc`,
22+
BINARY(c)=BINARY(UPPER(c)) AS `Bc=BUc`,
23+
c=LOWER(c) AS `c=L(c)`,
24+
c=UPPER(c) AS `c=U(c)`,
25+
c LIKE LOWER(c) AS `c~~L(c)`,
26+
c LIKE UPPER(c) AS `c~~U(c)`,
27+
COUNT(*),
28+
IF(BINARY(c)=BINARY(LOWER(c)) AND BINARY(c)=BINARY(UPPER(c)),'',
29+
LEFT(GROUP_CONCAT(c ORDER BY codepoint), 20)) AS example
30+
FROM v_bmp
31+
GROUP BY 1, 2, 3, 4, 5, 6;
32+
33+
34+
--echo #
35+
--echo # BMP characters with upper/lower mapping
36+
--echo #
37+
1638
SELECT
1739
codepoint_hex4,
1840
HEX(CAST(LOWER(c) AS CHAR CHARACTER SET ucs2)),
1941
HEX(CAST(UPPER(c) AS CHAR CHARACTER SET ucs2))
2042
FROM v_bmp
2143
WHERE BINARY(c)<>BINARY(LOWER(c)) OR BINARY(c)<>BINARY(UPPER(c));
2244

45+
--echo #
46+
--echo # BMP characters with a non-trivial upper/lower mapping
47+
--echo #
48+
49+
SELECT
50+
codepoint_hex4 as hex4,
51+
HEX(CAST(LOWER(c) AS CHAR CHARACTER SET ucs2)) AS hex4_l,
52+
HEX(CAST(UPPER(c) AS CHAR CHARACTER SET ucs2)) AS hex4_u,
53+
c=LOWER(c) AS `c=L`,
54+
c=UPPER(c) AS `c=U`,
55+
c LIKE LOWER(c) AS `c~~L`,
56+
c LIKE UPPER(c) AS `c~~U`,
57+
c,
58+
LOWER(c) AS `L(c)`,
59+
UPPER(c) AS `U(c)`
60+
FROM v_bmp
61+
WHERE NOT (
62+
(BINARY(c)=BINARY(LOWER(c)) OR BINARY(c)=BINARY(UPPER(c))) AND
63+
c = LOWER(c) AND
64+
c = UPPER(c) AND
65+
c LIKE UPPER(c) AND
66+
c LIKE LOWER(c)
67+
);
68+
69+
2370
DROP VIEW v_bmp;

mysql-test/main/ctype_ucs2_general_ci_casefold.result

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
# MDEV-30716 Wrong casefolding in xxx_unicode_520_ci for U+0700..U+07FF
66
#
7-
SET collation_connection=ucs2_general_ci;
7+
SET collation_connection=ucs2_general_ci, @@character_set_results=utf8mb3;
88
EXECUTE IMMEDIATE SFORMAT('
99
CREATE VIEW v_bmp AS
1010
SELECT
@@ -16,6 +16,30 @@ FROM
1616
SELECT COLLATION(c) FROM v_bmp LIMIT 1;
1717
COLLATION(c)
1818
ucs2_general_ci
19+
#
20+
# BMP character summary
21+
#
22+
SELECT
23+
BINARY(c)=BINARY(LOWER(c)) AS `Bc=BLc`,
24+
BINARY(c)=BINARY(UPPER(c)) AS `Bc=BUc`,
25+
c=LOWER(c) AS `c=L(c)`,
26+
c=UPPER(c) AS `c=U(c)`,
27+
c LIKE LOWER(c) AS `c~~L(c)`,
28+
c LIKE UPPER(c) AS `c~~U(c)`,
29+
COUNT(*),
30+
IF(BINARY(c)=BINARY(LOWER(c)) AND BINARY(c)=BINARY(UPPER(c)),'',
31+
LEFT(GROUP_CONCAT(c ORDER BY codepoint), 20)) AS example
32+
FROM v_bmp
33+
GROUP BY 1, 2, 3, 4, 5, 6;
34+
Bc=BLc Bc=BUc c=L(c) c=U(c) c~~L(c) c~~U(c) COUNT(*) example
35+
0 0 1 1 1 1 4 Dž,Lj,Nj,Dz
36+
0 1 0 1 0 1 3 Ω,K,Å
37+
0 1 1 1 1 1 689 A,B,C,D,E,F,G,H,I,J,
38+
1 0 1 1 1 1 702 a,b,c,d,e,f,g,h,i,j,
39+
1 1 1 1 1 1 64138
40+
#
41+
# BMP characters with upper/lower mapping
42+
#
1943
SELECT
2044
codepoint_hex4,
2145
HEX(CAST(LOWER(c) AS CHAR CHARACTER SET ucs2)),
@@ -1421,6 +1445,36 @@ FF57 FF57 FF37
14211445
FF58 FF58 FF38
14221446
FF59 FF59 FF39
14231447
FF5A FF5A FF3A
1448+
#
1449+
# BMP characters with a non-trivial upper/lower mapping
1450+
#
1451+
SELECT
1452+
codepoint_hex4 as hex4,
1453+
HEX(CAST(LOWER(c) AS CHAR CHARACTER SET ucs2)) AS hex4_l,
1454+
HEX(CAST(UPPER(c) AS CHAR CHARACTER SET ucs2)) AS hex4_u,
1455+
c=LOWER(c) AS `c=L`,
1456+
c=UPPER(c) AS `c=U`,
1457+
c LIKE LOWER(c) AS `c~~L`,
1458+
c LIKE UPPER(c) AS `c~~U`,
1459+
c,
1460+
LOWER(c) AS `L(c)`,
1461+
UPPER(c) AS `U(c)`
1462+
FROM v_bmp
1463+
WHERE NOT (
1464+
(BINARY(c)=BINARY(LOWER(c)) OR BINARY(c)=BINARY(UPPER(c))) AND
1465+
c = LOWER(c) AND
1466+
c = UPPER(c) AND
1467+
c LIKE UPPER(c) AND
1468+
c LIKE LOWER(c)
1469+
);
1470+
hex4 hex4_l hex4_u c=L c=U c~~L c~~U c L(c) U(c)
1471+
01C5 01C6 01C4 1 1 1 1 Dž dž DŽ
1472+
01C8 01C9 01C7 1 1 1 1 Lj lj LJ
1473+
01CB 01CC 01CA 1 1 1 1 Nj nj NJ
1474+
01F2 01F3 01F1 1 1 1 1 Dz dz DZ
1475+
2126 03C9 2126 0 1 0 1 Ω ω Ω
1476+
212A 006B 212A 0 1 0 1 K k K
1477+
212B 00E5 212B 0 1 0 1 Å å Å
14241478
DROP VIEW v_bmp;
14251479
#
14261480
# End of 10.7 tests

mysql-test/main/ctype_ucs2_general_ci_casefold.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
--echo # MDEV-30716 Wrong casefolding in xxx_unicode_520_ci for U+0700..U+07FF
77
--echo #
88

9-
SET collation_connection=ucs2_general_ci;
9+
SET collation_connection=ucs2_general_ci, @@character_set_results=utf8mb3;
1010
--source include/ctype_unicode_casefold_bmp.inc
1111

1212
--echo #

mysql-test/main/ctype_ucs2_general_mysql500_ci_casefold.result

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
# MDEV-30716 Wrong casefolding in xxx_unicode_520_ci for U+0700..U+07FF
66
#
7-
SET @@collation_connection=ucs2_general_mysql500_ci;
7+
SET @@collation_connection=ucs2_general_mysql500_ci, @@character_set_results=utf8mb3;
88
EXECUTE IMMEDIATE SFORMAT('
99
CREATE VIEW v_bmp AS
1010
SELECT
@@ -16,6 +16,30 @@ FROM
1616
SELECT COLLATION(c) FROM v_bmp LIMIT 1;
1717
COLLATION(c)
1818
ucs2_general_mysql500_ci
19+
#
20+
# BMP character summary
21+
#
22+
SELECT
23+
BINARY(c)=BINARY(LOWER(c)) AS `Bc=BLc`,
24+
BINARY(c)=BINARY(UPPER(c)) AS `Bc=BUc`,
25+
c=LOWER(c) AS `c=L(c)`,
26+
c=UPPER(c) AS `c=U(c)`,
27+
c LIKE LOWER(c) AS `c~~L(c)`,
28+
c LIKE UPPER(c) AS `c~~U(c)`,
29+
COUNT(*),
30+
IF(BINARY(c)=BINARY(LOWER(c)) AND BINARY(c)=BINARY(UPPER(c)),'',
31+
LEFT(GROUP_CONCAT(c ORDER BY codepoint), 20)) AS example
32+
FROM v_bmp
33+
GROUP BY 1, 2, 3, 4, 5, 6;
34+
Bc=BLc Bc=BUc c=L(c) c=U(c) c~~L(c) c~~U(c) COUNT(*) example
35+
0 0 1 1 1 1 4 Dž,Lj,Nj,Dz
36+
0 1 0 1 0 1 3 Ω,K,Å
37+
0 1 1 1 1 1 689 A,B,C,D,E,F,G,H,I,J,
38+
1 0 1 1 1 1 702 a,b,c,d,e,f,g,h,i,j,
39+
1 1 1 1 1 1 64138
40+
#
41+
# BMP characters with upper/lower mapping
42+
#
1943
SELECT
2044
codepoint_hex4,
2145
HEX(CAST(LOWER(c) AS CHAR CHARACTER SET ucs2)),
@@ -1421,6 +1445,36 @@ FF57 FF57 FF37
14211445
FF58 FF58 FF38
14221446
FF59 FF59 FF39
14231447
FF5A FF5A FF3A
1448+
#
1449+
# BMP characters with a non-trivial upper/lower mapping
1450+
#
1451+
SELECT
1452+
codepoint_hex4 as hex4,
1453+
HEX(CAST(LOWER(c) AS CHAR CHARACTER SET ucs2)) AS hex4_l,
1454+
HEX(CAST(UPPER(c) AS CHAR CHARACTER SET ucs2)) AS hex4_u,
1455+
c=LOWER(c) AS `c=L`,
1456+
c=UPPER(c) AS `c=U`,
1457+
c LIKE LOWER(c) AS `c~~L`,
1458+
c LIKE UPPER(c) AS `c~~U`,
1459+
c,
1460+
LOWER(c) AS `L(c)`,
1461+
UPPER(c) AS `U(c)`
1462+
FROM v_bmp
1463+
WHERE NOT (
1464+
(BINARY(c)=BINARY(LOWER(c)) OR BINARY(c)=BINARY(UPPER(c))) AND
1465+
c = LOWER(c) AND
1466+
c = UPPER(c) AND
1467+
c LIKE UPPER(c) AND
1468+
c LIKE LOWER(c)
1469+
);
1470+
hex4 hex4_l hex4_u c=L c=U c~~L c~~U c L(c) U(c)
1471+
01C5 01C6 01C4 1 1 1 1 Dž dž DŽ
1472+
01C8 01C9 01C7 1 1 1 1 Lj lj LJ
1473+
01CB 01CC 01CA 1 1 1 1 Nj nj NJ
1474+
01F2 01F3 01F1 1 1 1 1 Dz dz DZ
1475+
2126 03C9 2126 0 1 0 1 Ω ω Ω
1476+
212A 006B 212A 0 1 0 1 K k K
1477+
212B 00E5 212B 0 1 0 1 Å å Å
14241478
DROP VIEW v_bmp;
14251479
#
14261480
# End of 10.7 tests

mysql-test/main/ctype_ucs2_general_mysql500_ci_casefold.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
--echo # MDEV-30716 Wrong casefolding in xxx_unicode_520_ci for U+0700..U+07FF
77
--echo #
88

9-
SET @@collation_connection=ucs2_general_mysql500_ci;
9+
SET @@collation_connection=ucs2_general_mysql500_ci, @@character_set_results=utf8mb3;
1010
--source include/ctype_unicode_casefold_bmp.inc
1111

1212

mysql-test/main/ctype_ucs2_turkish_ci_casefold.result

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
# MDEV-30716 Wrong casefolding in xxx_unicode_520_ci for U+0700..U+07FF
66
#
7-
SET @@collation_connection=ucs2_turkish_ci;
7+
SET @@collation_connection=ucs2_turkish_ci, @@character_set_results=utf8mb3;
88
EXECUTE IMMEDIATE SFORMAT('
99
CREATE VIEW v_bmp AS
1010
SELECT
@@ -16,6 +16,30 @@ FROM
1616
SELECT COLLATION(c) FROM v_bmp LIMIT 1;
1717
COLLATION(c)
1818
ucs2_turkish_ci
19+
#
20+
# BMP character summary
21+
#
22+
SELECT
23+
BINARY(c)=BINARY(LOWER(c)) AS `Bc=BLc`,
24+
BINARY(c)=BINARY(UPPER(c)) AS `Bc=BUc`,
25+
c=LOWER(c) AS `c=L(c)`,
26+
c=UPPER(c) AS `c=U(c)`,
27+
c LIKE LOWER(c) AS `c~~L(c)`,
28+
c LIKE UPPER(c) AS `c~~U(c)`,
29+
COUNT(*),
30+
IF(BINARY(c)=BINARY(LOWER(c)) AND BINARY(c)=BINARY(UPPER(c)),'',
31+
LEFT(GROUP_CONCAT(c ORDER BY codepoint), 20)) AS example
32+
FROM v_bmp
33+
GROUP BY 1, 2, 3, 4, 5, 6;
34+
Bc=BLc Bc=BUc c=L(c) c=U(c) c~~L(c) c~~U(c) COUNT(*) example
35+
0 0 1 1 1 1 4 Dž,Lj,Nj,Dz
36+
0 1 1 1 1 1 692 A,B,C,D,E,F,G,H,I,J,
37+
1 0 1 0 1 0 1 ͅ
38+
1 0 1 1 1 1 701 a,b,c,d,e,f,g,h,i,j,
39+
1 1 1 1 1 1 64138
40+
#
41+
# BMP characters with upper/lower mapping
42+
#
1943
SELECT
2044
codepoint_hex4,
2145
HEX(CAST(LOWER(c) AS CHAR CHARACTER SET ucs2)),
@@ -1421,6 +1445,34 @@ FF57 FF57 FF37
14211445
FF58 FF58 FF38
14221446
FF59 FF59 FF39
14231447
FF5A FF5A FF3A
1448+
#
1449+
# BMP characters with a non-trivial upper/lower mapping
1450+
#
1451+
SELECT
1452+
codepoint_hex4 as hex4,
1453+
HEX(CAST(LOWER(c) AS CHAR CHARACTER SET ucs2)) AS hex4_l,
1454+
HEX(CAST(UPPER(c) AS CHAR CHARACTER SET ucs2)) AS hex4_u,
1455+
c=LOWER(c) AS `c=L`,
1456+
c=UPPER(c) AS `c=U`,
1457+
c LIKE LOWER(c) AS `c~~L`,
1458+
c LIKE UPPER(c) AS `c~~U`,
1459+
c,
1460+
LOWER(c) AS `L(c)`,
1461+
UPPER(c) AS `U(c)`
1462+
FROM v_bmp
1463+
WHERE NOT (
1464+
(BINARY(c)=BINARY(LOWER(c)) OR BINARY(c)=BINARY(UPPER(c))) AND
1465+
c = LOWER(c) AND
1466+
c = UPPER(c) AND
1467+
c LIKE UPPER(c) AND
1468+
c LIKE LOWER(c)
1469+
);
1470+
hex4 hex4_l hex4_u c=L c=U c~~L c~~U c L(c) U(c)
1471+
01C5 01C6 01C4 1 1 1 1 Dž dž DŽ
1472+
01C8 01C9 01C7 1 1 1 1 Lj lj LJ
1473+
01CB 01CC 01CA 1 1 1 1 Nj nj NJ
1474+
01F2 01F3 01F1 1 1 1 1 Dz dz DZ
1475+
0345 0345 0399 1 0 1 0 ͅ ͅ Ι
14241476
DROP VIEW v_bmp;
14251477
#
14261478
# End of 10.7 tests

mysql-test/main/ctype_ucs2_turkish_ci_casefold.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
--echo # MDEV-30716 Wrong casefolding in xxx_unicode_520_ci for U+0700..U+07FF
77
--echo #
88

9-
SET @@collation_connection=ucs2_turkish_ci;
9+
SET @@collation_connection=ucs2_turkish_ci, @@character_set_results=utf8mb3;
1010
--source include/ctype_unicode_casefold_bmp.inc
1111

1212
--echo #

mysql-test/main/ctype_ucs2_unicode_520_ci_casefold.result

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
# MDEV-30716 Wrong casefolding in xxx_unicode_520_ci for U+0700..U+07FF
66
#
7-
SET @@collation_connection=ucs2_unicode_520_ci;
7+
SET @@collation_connection=ucs2_unicode_520_ci, @@character_set_results=utf8mb3;
88
EXECUTE IMMEDIATE SFORMAT('
99
CREATE VIEW v_bmp AS
1010
SELECT
@@ -16,6 +16,30 @@ FROM
1616
SELECT COLLATION(c) FROM v_bmp LIMIT 1;
1717
COLLATION(c)
1818
ucs2_unicode_520_ci
19+
#
20+
# BMP character summary
21+
#
22+
SELECT
23+
BINARY(c)=BINARY(LOWER(c)) AS `Bc=BLc`,
24+
BINARY(c)=BINARY(UPPER(c)) AS `Bc=BUc`,
25+
c=LOWER(c) AS `c=L(c)`,
26+
c=UPPER(c) AS `c=U(c)`,
27+
c LIKE LOWER(c) AS `c~~L(c)`,
28+
c LIKE UPPER(c) AS `c~~U(c)`,
29+
COUNT(*),
30+
IF(BINARY(c)=BINARY(LOWER(c)) AND BINARY(c)=BINARY(UPPER(c)),'',
31+
LEFT(GROUP_CONCAT(c ORDER BY codepoint), 20)) AS example
32+
FROM v_bmp
33+
GROUP BY 1, 2, 3, 4, 5, 6;
34+
Bc=BLc Bc=BUc c=L(c) c=U(c) c~~L(c) c~~U(c) COUNT(*) example
35+
0 0 1 1 1 1 4 Dž,Lj,Nj,Dz
36+
0 1 1 1 1 1 985 A,B,C,D,E,F,G,H,I,J,
37+
1 0 1 0 1 0 2 ı,ͅ
38+
1 0 1 1 1 1 991 a,b,c,d,e,f,g,h,i,j,
39+
1 1 1 1 1 1 63554
40+
#
41+
# BMP characters with upper/lower mapping
42+
#
1943
SELECT
2044
codepoint_hex4,
2145
HEX(CAST(LOWER(c) AS CHAR CHARACTER SET ucs2)),
@@ -2005,6 +2029,35 @@ FF57 FF57 FF37
20052029
FF58 FF58 FF38
20062030
FF59 FF59 FF39
20072031
FF5A FF5A FF3A
2032+
#
2033+
# BMP characters with a non-trivial upper/lower mapping
2034+
#
2035+
SELECT
2036+
codepoint_hex4 as hex4,
2037+
HEX(CAST(LOWER(c) AS CHAR CHARACTER SET ucs2)) AS hex4_l,
2038+
HEX(CAST(UPPER(c) AS CHAR CHARACTER SET ucs2)) AS hex4_u,
2039+
c=LOWER(c) AS `c=L`,
2040+
c=UPPER(c) AS `c=U`,
2041+
c LIKE LOWER(c) AS `c~~L`,
2042+
c LIKE UPPER(c) AS `c~~U`,
2043+
c,
2044+
LOWER(c) AS `L(c)`,
2045+
UPPER(c) AS `U(c)`
2046+
FROM v_bmp
2047+
WHERE NOT (
2048+
(BINARY(c)=BINARY(LOWER(c)) OR BINARY(c)=BINARY(UPPER(c))) AND
2049+
c = LOWER(c) AND
2050+
c = UPPER(c) AND
2051+
c LIKE UPPER(c) AND
2052+
c LIKE LOWER(c)
2053+
);
2054+
hex4 hex4_l hex4_u c=L c=U c~~L c~~U c L(c) U(c)
2055+
0131 0131 0049 1 0 1 0 ı ı I
2056+
01C5 01C6 01C4 1 1 1 1 Dž dž DŽ
2057+
01C8 01C9 01C7 1 1 1 1 Lj lj LJ
2058+
01CB 01CC 01CA 1 1 1 1 Nj nj NJ
2059+
01F2 01F3 01F1 1 1 1 1 Dz dz DZ
2060+
0345 0345 0399 1 0 1 0 ͅ ͅ Ι
20082061
DROP VIEW v_bmp;
20092062
#
20102063
# End of 10.7 tests

mysql-test/main/ctype_ucs2_unicode_520_ci_casefold.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
--echo # MDEV-30716 Wrong casefolding in xxx_unicode_520_ci for U+0700..U+07FF
77
--echo #
88

9-
SET @@collation_connection=ucs2_unicode_520_ci;
9+
SET @@collation_connection=ucs2_unicode_520_ci, @@character_set_results=utf8mb3;
1010
--source include/ctype_unicode_casefold_bmp.inc
1111

1212
--echo #

0 commit comments

Comments
 (0)