Skip to content

Commit

Permalink
MDEV-10743 LDML: a new syntax to reuse sort order from another 8bit s…
Browse files Browse the repository at this point in the history
…imple collation
  • Loading branch information
Alexander Barkov committed Sep 6, 2016
1 parent 8ae6592 commit e4f6fd5
Show file tree
Hide file tree
Showing 10 changed files with 325 additions and 29 deletions.
46 changes: 46 additions & 0 deletions mysql-test/r/ctype_ldml.result
Original file line number Diff line number Diff line change
Expand Up @@ -454,8 +454,12 @@ select "foo" = "foo " collate latin1_test;
The following tests check that two-byte collation IDs work
select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
ascii2_bin2 ascii2 319 1
ascii2_general_ci ascii2 320 Yes 1
ascii2_bin ascii2 321 1
ascii2_general_inherited_ci ascii2 322 1
ascii2_general_inherited2_ci ascii2 323 1
ascii2_badly_inherited_ci ascii2 324 1
utf8mb4_test_ci utf8mb4 326 8
utf16_test_ci utf16 327 8
utf8mb4_test_400_ci utf8mb4 328 8
Expand Down Expand Up @@ -1185,20 +1189,62 @@ ch w ducet
DROP TABLE t1;
#
# Testing that the MY_CS_PUREASCII flag is set properly
# Comparison between ascii2 and latin1 should not give "illegal collation error"
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2, b VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES ('a','a'),('b','b');
SELECT * FROM t1 WHERE a=b;
a b
a a
b b
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin2;
SELECT * FROM t1 WHERE a=b;
a b
a a
b b
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin;
SELECT * FROM t1 WHERE a=b;
a b
a a
b b
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_general_inherited_ci;
SELECT * FROM t1 WHERE a=b;
a b
a a
b b
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_general_inherited2_ci;
SELECT * FROM t1 WHERE a=b;
a b
a a
b b
DROP TABLE t1;
#
# Testing that in case of two binary collations
# "BINARY" in a column definition uses the collation with the least id
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2 BINARY);
INSERT INTO t1 VALUES ('test');
SELECT COLLATION(a) FROM t1;
COLLATION(a)
ascii2_bin2
DROP TABLE t1;
#
# Testing mixing of two binary collations of the same character set
#
CREATE TABLE t1 (
a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin,
b VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin2
);
INSERT INTO t1 VALUES ('a','a');
SELECT * FROM t1 WHERE a=b;
ERROR HY000: Illegal mix of collations (ascii2_bin,IMPLICIT) and (ascii2_bin2,IMPLICIT) for operation '='
DROP TABLE t1;
#
# Testing bad collation inheritance
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_badly_inherited_ci);
ERROR HY000: Unknown collation: 'ascii2_badly_inherited_ci'
#
# Testing that the MY_CS_CSSORT flag is set properly
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test);
Expand Down
21 changes: 21 additions & 0 deletions mysql-test/std_data/ldml/Index.xml
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,29 @@
</charset>

<charset name="ascii2">
<!--
Notes:
- ascii2 has two collations with "binary" flag.
ctype_ldml.test makes sure that
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2 BINARY);
uses ascii2_bin2, which is the collation with the least ID.
- ascii2_general_inherited_ci inherits sort order in ascii2.xml
- ascii2_genegal_inherited2_ci inherits sort order directly in this file.
-->
<collation name="ascii2_bin2" id="319" flag="binary"/>
<collation name="ascii2_general_ci" id="320" flag="primary"/>
<collation name="ascii2_bin" id="321" flag="binary"/>
<collation name="ascii2_general_inherited_ci" id="322"/>
<collation name="ascii2_general_inherited2_ci" id="323">
<rules>
<import source="ascii2_general_ci"/>
</rules>
</collation>
<collation name="ascii2_badly_inherited_ci" id="324">
<rules>
<import source="ascii2_non_existing_ci"/>
</rules>
</collation>
</charset>

<charset name="latin1">
Expand Down
6 changes: 6 additions & 0 deletions mysql-test/std_data/ldml/ascii2.xml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@

<collation name="ascii2_bin" flag="binary"/>

<collation name="ascii2_general_inherited_ci">
<rules>
<import source="ascii2_general_ci"/>
</rules>
</collation>

</charset>

</charsets>
4 changes: 4 additions & 0 deletions mysql-test/suite/innodb/r/innodb_ctype_ldml.result
Original file line number Diff line number Diff line change
Expand Up @@ -390,8 +390,12 @@ select "foo" = "foo " collate latin1_test;
The following tests check that two-byte collation IDs work
select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
ascii2_bin2 ascii2 319 1
ascii2_general_ci ascii2 320 Yes 1
ascii2_bin ascii2 321 1
ascii2_general_inherited_ci ascii2 322 1
ascii2_general_inherited2_ci ascii2 323 1
ascii2_badly_inherited_ci ascii2 324 1
utf8mb4_test_ci utf8mb4 326 8
utf16_test_ci utf16 327 8
utf8mb4_test_400_ci utf8mb4 328 8
Expand Down
36 changes: 34 additions & 2 deletions mysql-test/t/ctype_ldml.test
Original file line number Diff line number Diff line change
Expand Up @@ -413,16 +413,48 @@ DROP TABLE t1;

--echo #
--echo # Testing that the MY_CS_PUREASCII flag is set properly
--echo # Comparison between ascii2 and latin1 should not give "illegal collation error"
--echo #
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2, b VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES ('a','a'),('b','b');
# should not give "illegal collation" error
SELECT * FROM t1 WHERE a=b;
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin2;
SELECT * FROM t1 WHERE a=b;
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin;
# should not give "illegal collation" error
SELECT * FROM t1 WHERE a=b;
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_general_inherited_ci;
SELECT * FROM t1 WHERE a=b;
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_general_inherited2_ci;
SELECT * FROM t1 WHERE a=b;
DROP TABLE t1;

--echo #
--echo # Testing that in case of two binary collations
--echo # "BINARY" in a column definition uses the collation with the least id
--echo #
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2 BINARY);
INSERT INTO t1 VALUES ('test');
SELECT COLLATION(a) FROM t1;
DROP TABLE t1;


--echo #
--echo # Testing mixing of two binary collations of the same character set
--echo #
CREATE TABLE t1 (
a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin,
b VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin2
);
INSERT INTO t1 VALUES ('a','a');
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT * FROM t1 WHERE a=b;
DROP TABLE t1;

--echo #
--echo # Testing bad collation inheritance
--echo #
--error ER_UNKNOWN_COLLATION
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_badly_inherited_ci);

--echo #
--echo # Testing that the MY_CS_CSSORT flag is set properly
Expand Down
98 changes: 93 additions & 5 deletions mysys/charset.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,13 +197,55 @@ static int cs_copy_data(struct charset_info_st *to, CHARSET_INFO *from)
}


static my_bool simple_8bit_charset_data_is_full(CHARSET_INFO *cs)
{
return cs->ctype && cs->to_upper && cs->to_lower && cs->tab_to_uni;
}


/**
Inherit missing 8bit charset data from another collation.
Arrays pointed by refcs must be in the permanent memory already,
e.g. static memory, or allocated by my_once_xxx().
*/
static void
inherit_charset_data(struct charset_info_st *cs, CHARSET_INFO *refcs)
{
if (!cs->to_upper)
cs->to_upper= refcs->to_upper;
if (!cs->to_lower)
cs->to_lower= refcs->to_lower;
if (!cs->ctype)
cs->ctype= refcs->ctype;
if (!cs->tab_to_uni)
cs->tab_to_uni= refcs->tab_to_uni;
}


static my_bool simple_8bit_collation_data_is_full(CHARSET_INFO *cs)
{
return cs->sort_order || (cs->state & MY_CS_BINSORT);
}


/**
Inherit 8bit simple collation data from another collation.
refcs->sort_order must be in the permanent memory already,
e.g. static memory, or allocated by my_once_xxx().
*/
static void
inherit_collation_data(struct charset_info_st *cs, CHARSET_INFO *refcs)
{
if (!simple_8bit_collation_data_is_full(cs))
cs->sort_order= refcs->sort_order;
}


static my_bool simple_cs_is_full(CHARSET_INFO *cs)
{
return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
cs->to_lower) &&
(cs->number && cs->name &&
(cs->sort_order || (cs->state & MY_CS_BINSORT) )));
return cs->number && cs->csname && cs->name &&
simple_8bit_charset_data_is_full(cs) &&
(simple_8bit_collation_data_is_full(cs) || cs->tailoring);
}


Expand Down Expand Up @@ -336,7 +378,7 @@ static int add_collation(struct charset_info_st *cs)
cs->name= NULL;
cs->state= 0;
cs->sort_order= NULL;
cs->state= 0;
cs->tailoring= NULL;
}
return MY_XML_OK;
}
Expand Down Expand Up @@ -631,6 +673,39 @@ const char *get_charset_name(uint charset_number)
}


static CHARSET_INFO *inheritance_source_by_id(CHARSET_INFO *cs, uint refid)
{
CHARSET_INFO *refcs;
return refid && refid != cs->number &&
(refcs= all_charsets[refid]) &&
(refcs->state & MY_CS_AVAILABLE) ? refcs : NULL;
}


static CHARSET_INFO *find_collation_data_inheritance_source(CHARSET_INFO *cs)
{
const char *beg, *end;
if (cs->tailoring &&
!strncmp(cs->tailoring, "[import ", 8) &&
(end= strchr(cs->tailoring + 8, ']')) &&
(beg= cs->tailoring + 8) + MY_CS_NAME_SIZE > end)
{
char name[MY_CS_NAME_SIZE + 1];
memcpy(name, beg, end - beg);
name[end - beg]= '\0';
return inheritance_source_by_id(cs, get_collation_number(name));
}
return NULL;
}


static CHARSET_INFO *find_charset_data_inheritance_source(CHARSET_INFO *cs)
{
uint refid= get_charset_number_internal(cs->csname, MY_CS_PRIMARY);
return inheritance_source_by_id(cs, refid);
}


static CHARSET_INFO *
get_internal_charset(MY_CHARSET_LOADER *loader, uint cs_number, myf flags)
{
Expand Down Expand Up @@ -665,6 +740,19 @@ get_internal_charset(MY_CHARSET_LOADER *loader, uint cs_number, myf flags)
{
if (!(cs->state & MY_CS_READY))
{
if (!simple_8bit_charset_data_is_full(cs))
{
CHARSET_INFO *refcs= find_charset_data_inheritance_source(cs);
if (refcs)
inherit_charset_data(cs, refcs);
}
if (!simple_8bit_collation_data_is_full(cs))
{
CHARSET_INFO *refcl= find_collation_data_inheritance_source(cs);
if (refcl)
inherit_collation_data(cs, refcl);
}

if ((cs->cset->init && cs->cset->init(cs, loader)) ||
(cs->coll->init && cs->coll->init(cs, loader)))
{
Expand Down
3 changes: 3 additions & 0 deletions sql/item.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2078,6 +2078,9 @@ bool DTCollation::aggregate(const DTCollation &dt, uint flags)
set(0, DERIVATION_NONE, 0);
return 1;
}
if (collation->state & MY_CS_BINSORT &&
dt.collation->state & MY_CS_BINSORT)
return 1;
if (collation->state & MY_CS_BINSORT)
return 0;
if (dt.collation->state & MY_CS_BINSORT)
Expand Down
Loading

0 comments on commit e4f6fd5

Please sign in to comment.