Skip to content

Commit

Permalink
MDEV-30577 Case folding for uca1400 collations is not up to date
Browse files Browse the repository at this point in the history
Adding casefolding for Unicode-14.0.0 into uca1400 collations.
  • Loading branch information
abarkov committed Apr 18, 2023
1 parent 6075f12 commit c21745d
Show file tree
Hide file tree
Showing 15 changed files with 8,221 additions and 4 deletions.
6 changes: 6 additions & 0 deletions mysql-test/include/ctype_casefolding.inc
Expand Up @@ -21,6 +21,12 @@ INSERT INTO case_folding (code) VALUES
(0x0131) /* LATIN SMALL LETTER DOTLESS I */
;

INSERT INTO case_folding (code) VALUES
(0x2C2F) /* GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0x2C5F) /* GLAGOLITIC SMALL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0xA7C0) /* LATIN CAPITAL LETTER OLD POLISH O (Unicode-14.0) */,
(0xA7C1) /* LATIN SMALL LETTER OLD POLISH O (Unicode-14.0) */;

UPDATE case_folding SET c=CHAR(code USING ucs2);
SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding;
DROP TABLE case_folding;
16 changes: 16 additions & 0 deletions mysql-test/include/ctype_casefolding_supplementary.inc
@@ -0,0 +1,16 @@
CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c, SPACE(64) AS comment LIMIT 0;
SHOW CREATE TABLE case_folding;

INSERT INTO case_folding (code, comment) VALUES (0x10595, 'VITHKUQI CAPITAL LETTER ZE (Unicode-14.0)');
INSERT INTO case_folding (code, comment) VALUES (0x105BC, 'VITHKUQI SMALL LETTER ZE (Unicode-14.0)');
INSERT INTO case_folding (code, comment) VALUES (0x1E921, 'ADLAM CAPITAL LETTER SHA (Unicode-9.0)');
INSERT INTO case_folding (code, comment) VALUES (0x1E943, 'ADLAM SMALL LETTER SHA (Unicode-9.0)');

UPDATE case_folding SET c=CHAR(code USING utf32);
SELECT
HEX(CONVERT(c USING utf32)) AS ch,
HEX(CONVERT(LOWER(c) USING utf32)) AS cl,
HEX(CONVERT(UPPER(c) USING utf32)) AS cu,
comment
FROM case_folding ORDER BY BINARY(c);
DROP TABLE case_folding;
47 changes: 47 additions & 0 deletions mysql-test/main/ctype_ldml.result
Expand Up @@ -3074,6 +3074,11 @@ INSERT INTO case_folding (code) VALUES
(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */,
(0x0131) /* LATIN SMALL LETTER DOTLESS I */
;
INSERT INTO case_folding (code) VALUES
(0x2C2F) /* GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0x2C5F) /* GLAGOLITIC SMALL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0xA7C0) /* LATIN CAPITAL LETTER OLD POLISH O (Unicode-14.0) */,
(0xA7C1) /* LATIN SMALL LETTER OLD POLISH O (Unicode-14.0) */;
UPDATE case_folding SET c=CHAR(code USING ucs2);
SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding;
HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
Expand All @@ -3091,6 +3096,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
69 69 49 i
130 69 C4B0 İ
131 C4B1 49 ı
2C2F E2B0AF E2B0AF Ⱟ
2C5F E2B19F E2B19F ⱟ
A7C0 EA9F80 EA9F80 Ꟁ
A7C1 EA9F81 EA9F81 ꟁ
DROP TABLE case_folding;
#
# End of 10.3 tests
Expand Down Expand Up @@ -3267,6 +3276,11 @@ INSERT INTO case_folding (code) VALUES
(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */,
(0x0131) /* LATIN SMALL LETTER DOTLESS I */
;
INSERT INTO case_folding (code) VALUES
(0x2C2F) /* GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0x2C5F) /* GLAGOLITIC SMALL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0xA7C0) /* LATIN CAPITAL LETTER OLD POLISH O (Unicode-14.0) */,
(0xA7C1) /* LATIN SMALL LETTER OLD POLISH O (Unicode-14.0) */;
UPDATE case_folding SET c=CHAR(code USING ucs2);
SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding;
HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
Expand All @@ -3284,6 +3298,39 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
69 69 49 i
130 69 C4B0 İ
131 C4B1 49 ı
2C2F E2B19F E2B0AF Ⱟ
2C5F E2B19F E2B0AF ⱟ
A7C0 EA9F81 EA9F80 Ꟁ
A7C1 EA9F81 EA9F80 ꟁ
DROP TABLE case_folding;
#
# MDEV-30577 Case folding for uca1400 collations is not up to date
#
SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci;
CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c, SPACE(64) AS comment LIMIT 0;
SHOW CREATE TABLE case_folding;
Table Create Table
case_folding CREATE TABLE `case_folding` (
`code` int(1) NOT NULL,
`c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci DEFAULT NULL,
`comment` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO case_folding (code, comment) VALUES (0x10595, 'VITHKUQI CAPITAL LETTER ZE (Unicode-14.0)');
INSERT INTO case_folding (code, comment) VALUES (0x105BC, 'VITHKUQI SMALL LETTER ZE (Unicode-14.0)');
INSERT INTO case_folding (code, comment) VALUES (0x1E921, 'ADLAM CAPITAL LETTER SHA (Unicode-9.0)');
INSERT INTO case_folding (code, comment) VALUES (0x1E943, 'ADLAM SMALL LETTER SHA (Unicode-9.0)');
UPDATE case_folding SET c=CHAR(code USING utf32);
SELECT
HEX(CONVERT(c USING utf32)) AS ch,
HEX(CONVERT(LOWER(c) USING utf32)) AS cl,
HEX(CONVERT(UPPER(c) USING utf32)) AS cu,
comment
FROM case_folding ORDER BY BINARY(c);
ch cl cu comment
00010595 000105BC 00010595 VITHKUQI CAPITAL LETTER ZE (Unicode-14.0)
000105BC 000105BC 00010595 VITHKUQI SMALL LETTER ZE (Unicode-14.0)
0001E921 0001E943 0001E921 ADLAM CAPITAL LETTER SHA (Unicode-9.0)
0001E943 0001E943 0001E921 ADLAM SMALL LETTER SHA (Unicode-9.0)
DROP TABLE case_folding;
#
# End of 10.10 tests
Expand Down
7 changes: 7 additions & 0 deletions mysql-test/main/ctype_ldml.test
Expand Up @@ -704,6 +704,13 @@ DROP TABLE t1;
SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci;
--source include/ctype_casefolding.inc

--echo #
--echo # MDEV-30577 Case folding for uca1400 collations is not up to date
--echo #

SET NAMES utf8mb4 COLLATE utf8mb4_uca1400_test01_as_ci;
--source include/ctype_casefolding_supplementary.inc

--echo #
--echo # End of 10.10 tests
--echo #
63 changes: 63 additions & 0 deletions mysql-test/main/ctype_utf8_uca.result
Expand Up @@ -618,6 +618,11 @@ INSERT INTO case_folding (code) VALUES
(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */,
(0x0131) /* LATIN SMALL LETTER DOTLESS I */
;
INSERT INTO case_folding (code) VALUES
(0x2C2F) /* GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0x2C5F) /* GLAGOLITIC SMALL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0xA7C0) /* LATIN CAPITAL LETTER OLD POLISH O (Unicode-14.0) */,
(0xA7C1) /* LATIN SMALL LETTER OLD POLISH O (Unicode-14.0) */;
UPDATE case_folding SET c=CHAR(code USING ucs2);
SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding;
HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
Expand All @@ -635,6 +640,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
69 69 49 i
130 69 C4B0 İ
131 C4B1 49 ı
2C2F E2B0AF E2B0AF Ⱟ
2C5F E2B19F E2B19F ⱟ
A7C0 EA9F80 EA9F80 Ꟁ
A7C1 EA9F81 EA9F81 ꟁ
DROP TABLE case_folding;
SET NAMES utf8mb3 COLLATE utf8mb3_turkish_ci /*Unicode-4.0 folding, with Turkish mapping for I */;
CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0;
Expand All @@ -661,6 +670,11 @@ INSERT INTO case_folding (code) VALUES
(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */,
(0x0131) /* LATIN SMALL LETTER DOTLESS I */
;
INSERT INTO case_folding (code) VALUES
(0x2C2F) /* GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0x2C5F) /* GLAGOLITIC SMALL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0xA7C0) /* LATIN CAPITAL LETTER OLD POLISH O (Unicode-14.0) */,
(0xA7C1) /* LATIN SMALL LETTER OLD POLISH O (Unicode-14.0) */;
UPDATE case_folding SET c=CHAR(code USING ucs2);
SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding;
HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
Expand All @@ -678,6 +692,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
69 69 C4B0 i
130 69 C4B0 İ
131 C4B1 49 ı
2C2F E2B0AF E2B0AF Ⱟ
2C5F E2B19F E2B19F ⱟ
A7C0 EA9F80 EA9F80 Ꟁ
A7C1 EA9F81 EA9F81 ꟁ
DROP TABLE case_folding;
SET NAMES utf8mb3 COLLATE utf8mb3_unicode_520_ci;
CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0;
Expand All @@ -704,6 +722,11 @@ INSERT INTO case_folding (code) VALUES
(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */,
(0x0131) /* LATIN SMALL LETTER DOTLESS I */
;
INSERT INTO case_folding (code) VALUES
(0x2C2F) /* GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0x2C5F) /* GLAGOLITIC SMALL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0xA7C0) /* LATIN CAPITAL LETTER OLD POLISH O (Unicode-14.0) */,
(0xA7C1) /* LATIN SMALL LETTER OLD POLISH O (Unicode-14.0) */;
UPDATE case_folding SET c=CHAR(code USING ucs2);
SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding;
HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
Expand All @@ -721,6 +744,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
69 69 49 i
130 69 C4B0 İ
131 C4B1 49 ı
2C2F E2B0AF E2B0AF Ⱟ
2C5F E2B19F E2B19F ⱟ
A7C0 EA9F80 EA9F80 Ꟁ
A7C1 EA9F81 EA9F81 ꟁ
DROP TABLE case_folding;
SET NAMES utf8mb3 COLLATE utf8mb3_unicode_520_nopad_ci;
CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0;
Expand All @@ -747,6 +774,11 @@ INSERT INTO case_folding (code) VALUES
(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */,
(0x0131) /* LATIN SMALL LETTER DOTLESS I */
;
INSERT INTO case_folding (code) VALUES
(0x2C2F) /* GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0x2C5F) /* GLAGOLITIC SMALL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0xA7C0) /* LATIN CAPITAL LETTER OLD POLISH O (Unicode-14.0) */,
(0xA7C1) /* LATIN SMALL LETTER OLD POLISH O (Unicode-14.0) */;
UPDATE case_folding SET c=CHAR(code USING ucs2);
SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding;
HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
Expand All @@ -764,6 +796,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
69 69 49 i
130 69 C4B0 İ
131 C4B1 49 ı
2C2F E2B0AF E2B0AF Ⱟ
2C5F E2B19F E2B19F ⱟ
A7C0 EA9F80 EA9F80 Ꟁ
A7C1 EA9F81 EA9F81 ꟁ
DROP TABLE case_folding;
SET NAMES utf8mb3 COLLATE utf8mb3_myanmar_ci;
CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0;
Expand All @@ -790,6 +826,11 @@ INSERT INTO case_folding (code) VALUES
(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */,
(0x0131) /* LATIN SMALL LETTER DOTLESS I */
;
INSERT INTO case_folding (code) VALUES
(0x2C2F) /* GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0x2C5F) /* GLAGOLITIC SMALL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0xA7C0) /* LATIN CAPITAL LETTER OLD POLISH O (Unicode-14.0) */,
(0xA7C1) /* LATIN SMALL LETTER OLD POLISH O (Unicode-14.0) */;
UPDATE case_folding SET c=CHAR(code USING ucs2);
SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding;
HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
Expand All @@ -807,6 +848,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
69 69 49 i
130 69 C4B0 İ
131 C4B1 49 ı
2C2F E2B0AF E2B0AF Ⱟ
2C5F E2B19F E2B19F ⱟ
A7C0 EA9F80 EA9F80 Ꟁ
A7C1 EA9F81 EA9F81 ꟁ
DROP TABLE case_folding;
SET NAMES utf8mb3 COLLATE utf8mb3_thai_520_w2;
CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0;
Expand All @@ -833,6 +878,11 @@ INSERT INTO case_folding (code) VALUES
(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */,
(0x0131) /* LATIN SMALL LETTER DOTLESS I */
;
INSERT INTO case_folding (code) VALUES
(0x2C2F) /* GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0x2C5F) /* GLAGOLITIC SMALL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0xA7C0) /* LATIN CAPITAL LETTER OLD POLISH O (Unicode-14.0) */,
(0xA7C1) /* LATIN SMALL LETTER OLD POLISH O (Unicode-14.0) */;
UPDATE case_folding SET c=CHAR(code USING ucs2);
SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding;
HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
Expand All @@ -850,6 +900,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
69 69 49 i
130 69 C4B0 İ
131 C4B1 49 ı
2C2F E2B0AF E2B0AF Ⱟ
2C5F E2B19F E2B19F ⱟ
A7C0 EA9F80 EA9F80 Ꟁ
A7C1 EA9F81 EA9F81 ꟁ
DROP TABLE case_folding;
#
# End of 10.3 tests
Expand Down Expand Up @@ -1805,6 +1859,11 @@ INSERT INTO case_folding (code) VALUES
(0x0130) /* LATIN CAPITAL LETTER I WITH DOT ABOVE */,
(0x0131) /* LATIN SMALL LETTER DOTLESS I */
;
INSERT INTO case_folding (code) VALUES
(0x2C2F) /* GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0x2C5F) /* GLAGOLITIC SMALL LETTER CAUDATE CHRIVI (Unicode-14.0) */,
(0xA7C0) /* LATIN CAPITAL LETTER OLD POLISH O (Unicode-14.0) */,
(0xA7C1) /* LATIN SMALL LETTER OLD POLISH O (Unicode-14.0) */;
UPDATE case_folding SET c=CHAR(code USING ucs2);
SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding;
HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
Expand All @@ -1822,6 +1881,10 @@ HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c
69 69 49 i
130 69 C4B0 İ
131 C4B1 49 ı
2C2F E2B19F E2B0AF Ⱟ
2C5F E2B19F E2B0AF ⱟ
A7C0 EA9F81 EA9F80 Ꟁ
A7C1 EA9F81 EA9F80 ꟁ
DROP TABLE case_folding;
#
# End of 10.10 tests
Expand Down

0 comments on commit c21745d

Please sign in to comment.