Skip to content

Commit ee19806

Browse files
author
Alexander Barkov
committed
MDEV-9711 NO PAD collations
Based on the patch from Daniil Medvedev (a Google Summer of Code task)
1 parent e4f6fd5 commit ee19806

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+28253
-345
lines changed

include/m_ctype.h

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,8 @@ struct my_collation_handler_st
362362

363363
extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler;
364364
extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
365+
extern MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler;
366+
extern MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler;
365367
extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
366368

367369
/* Some typedef to make it easy for C++ to make function pointers */
@@ -585,49 +587,81 @@ struct charset_info_st
585587

586588
extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_bin;
587589
extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_latin1;
590+
extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_latin1_nopad;
588591
extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_filename;
589592
extern MYSQL_PLUGIN_IMPORT struct charset_info_st my_charset_utf8_general_ci;
590593

591594
extern struct charset_info_st my_charset_big5_bin;
592595
extern struct charset_info_st my_charset_big5_chinese_ci;
596+
extern struct charset_info_st my_charset_big5_nopad_bin;
597+
extern struct charset_info_st my_charset_big5_chinese_nopad_ci;
593598
extern struct charset_info_st my_charset_cp1250_czech_ci;
594599
extern struct charset_info_st my_charset_cp932_bin;
595600
extern struct charset_info_st my_charset_cp932_japanese_ci;
601+
extern struct charset_info_st my_charset_cp932_nopad_bin;
602+
extern struct charset_info_st my_charset_cp932_japanese_nopad_ci;
596603
extern struct charset_info_st my_charset_eucjpms_bin;
597604
extern struct charset_info_st my_charset_eucjpms_japanese_ci;
605+
extern struct charset_info_st my_charset_eucjpms_nopad_bin;
606+
extern struct charset_info_st my_charset_eucjpms_japanese_nopad_ci;
598607
extern struct charset_info_st my_charset_euckr_bin;
599608
extern struct charset_info_st my_charset_euckr_korean_ci;
609+
extern struct charset_info_st my_charset_euckr_nopad_bin;
610+
extern struct charset_info_st my_charset_euckr_korean_nopad_ci;
600611
extern struct charset_info_st my_charset_gb2312_bin;
601612
extern struct charset_info_st my_charset_gb2312_chinese_ci;
613+
extern struct charset_info_st my_charset_gb2312_nopad_bin;
614+
extern struct charset_info_st my_charset_gb2312_chinese_nopad_ci;
602615
extern struct charset_info_st my_charset_gbk_bin;
603616
extern struct charset_info_st my_charset_gbk_chinese_ci;
617+
extern struct charset_info_st my_charset_gbk_nopad_bin;
618+
extern struct charset_info_st my_charset_gbk_chinese_nopad_ci;
604619
extern struct charset_info_st my_charset_latin1_bin;
620+
extern struct charset_info_st my_charset_latin1_nopad_bin;
605621
extern struct charset_info_st my_charset_latin1_german2_ci;
606622
extern struct charset_info_st my_charset_latin2_czech_ci;
607623
extern struct charset_info_st my_charset_sjis_bin;
608624
extern struct charset_info_st my_charset_sjis_japanese_ci;
625+
extern struct charset_info_st my_charset_sjis_nopad_bin;
626+
extern struct charset_info_st my_charset_sjis_japanese_nopad_ci;
609627
extern struct charset_info_st my_charset_tis620_bin;
610628
extern struct charset_info_st my_charset_tis620_thai_ci;
629+
extern struct charset_info_st my_charset_tis620_nopad_bin;
630+
extern struct charset_info_st my_charset_tis620_thai_nopad_ci;
611631
extern struct charset_info_st my_charset_ucs2_bin;
612632
extern struct charset_info_st my_charset_ucs2_general_ci;
633+
extern struct charset_info_st my_charset_ucs2_nopad_bin;
634+
extern struct charset_info_st my_charset_ucs2_general_nopad_ci;
613635
extern struct charset_info_st my_charset_ucs2_general_mysql500_ci;
614636
extern struct charset_info_st my_charset_ucs2_unicode_ci;
615637
extern struct charset_info_st my_charset_ucs2_general_mysql500_ci;
616638
extern struct charset_info_st my_charset_ujis_bin;
617639
extern struct charset_info_st my_charset_ujis_japanese_ci;
640+
extern struct charset_info_st my_charset_ujis_nopad_bin;
641+
extern struct charset_info_st my_charset_ujis_japanese_nopad_ci;
618642
extern struct charset_info_st my_charset_utf16_bin;
619643
extern struct charset_info_st my_charset_utf16_general_ci;
620644
extern struct charset_info_st my_charset_utf16_unicode_ci;
621645
extern struct charset_info_st my_charset_utf16le_bin;
622646
extern struct charset_info_st my_charset_utf16le_general_ci;
647+
extern struct charset_info_st my_charset_utf16_general_nopad_ci;
648+
extern struct charset_info_st my_charset_utf16_nopad_bin;
649+
extern struct charset_info_st my_charset_utf16le_nopad_bin;
650+
extern struct charset_info_st my_charset_utf16le_general_nopad_ci;
623651
extern struct charset_info_st my_charset_utf32_bin;
624652
extern struct charset_info_st my_charset_utf32_general_ci;
625653
extern struct charset_info_st my_charset_utf32_unicode_ci;
654+
extern struct charset_info_st my_charset_utf32_nopad_bin;
655+
extern struct charset_info_st my_charset_utf32_general_nopad_ci;
626656
extern struct charset_info_st my_charset_utf8_bin;
657+
extern struct charset_info_st my_charset_utf8_nopad_bin;
658+
extern struct charset_info_st my_charset_utf8_general_nopad_ci;
627659
extern struct charset_info_st my_charset_utf8_general_mysql500_ci;
628660
extern struct charset_info_st my_charset_utf8_unicode_ci;
629661
extern struct charset_info_st my_charset_utf8mb4_bin;
630662
extern struct charset_info_st my_charset_utf8mb4_general_ci;
663+
extern struct charset_info_st my_charset_utf8mb4_nopad_bin;
664+
extern struct charset_info_st my_charset_utf8mb4_general_nopad_ci;
631665
extern struct charset_info_st my_charset_utf8mb4_unicode_ci;
632666

633667
#define MY_UTF8MB3 "utf8"
@@ -653,6 +687,11 @@ extern int my_strnncollsp_simple(CHARSET_INFO *, const uchar *, size_t,
653687
extern void my_hash_sort_simple(CHARSET_INFO *cs,
654688
const uchar *key, size_t len,
655689
ulong *nr1, ulong *nr2);
690+
691+
extern void my_hash_sort_simple_nopad(CHARSET_INFO *cs,
692+
const uchar *key, size_t len,
693+
ulong *nr1, ulong *nr2);
694+
656695
extern void my_hash_sort_bin(CHARSET_INFO *cs,
657696
const uchar *key, size_t len, ulong *nr1,
658697
ulong *nr2);
@@ -824,18 +863,38 @@ int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
824863
void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
825864
const uchar *key, size_t len,ulong *nr1, ulong *nr2);
826865

866+
void my_hash_sort_mb_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
867+
const uchar *key, size_t len,
868+
ulong *nr1, ulong *nr2);
869+
827870
size_t my_strnxfrm_mb(CHARSET_INFO *,
828871
uchar *dst, size_t dstlen, uint nweights,
829872
const uchar *src, size_t srclen, uint flags);
830873

874+
size_t my_strnxfrm_mb_nopad(CHARSET_INFO *,
875+
uchar *dst, size_t dstlen, uint nweights,
876+
const uchar *src, size_t srclen, uint flags);
877+
831878
size_t my_strnxfrm_unicode(CHARSET_INFO *,
832879
uchar *dst, size_t dstlen, uint nweights,
833880
const uchar *src, size_t srclen, uint flags);
881+
882+
size_t my_strnxfrm_unicode_nopad(CHARSET_INFO *,
883+
uchar *dst, size_t dstlen, uint nweights,
884+
const uchar *src, size_t srclen, uint flags);
885+
834886
size_t my_strnxfrmlen_unicode(CHARSET_INFO *, size_t);
835887

836888
size_t my_strnxfrm_unicode_full_bin(CHARSET_INFO *,
837-
uchar *dst, size_t dstlen, uint nweights,
838-
const uchar *src, size_t srclen, uint flags);
889+
uchar *dst, size_t dstlen,
890+
uint nweights, const uchar *src,
891+
size_t srclen, uint flags);
892+
893+
size_t my_strnxfrm_unicode_full_nopad_bin(CHARSET_INFO *,
894+
uchar *dst, size_t dstlen,
895+
uint nweights, const uchar *src,
896+
size_t srclen, uint flags);
897+
839898
size_t my_strnxfrmlen_unicode_full_bin(CHARSET_INFO *, size_t);
840899

841900
int my_wildcmp_unicode(CHARSET_INFO *cs,
@@ -873,6 +932,10 @@ void my_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
873932
size_t my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
874933
uchar *str, uchar *frmend, uchar *strend,
875934
uint nweights, uint flags, uint level);
935+
size_t my_strxfrm_pad_desc_and_reverse_nopad(CHARSET_INFO *cs,
936+
uchar *str, uchar *frmend,
937+
uchar *strend, uint nweights,
938+
uint flags, uint level);
876939

877940
const MY_CONTRACTIONS *my_charset_get_contractions(CHARSET_INFO *cs,
878941
int level);

mysql-test/include/ctype_pad.inc

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
--echo #
2+
--echo # Start of ctype_pad.inc
3+
--echo #
4+
5+
--echo #
6+
--echo # Unique indexes
7+
--echo #
8+
9+
eval CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) COLLATE $coll;
10+
SHOW CREATE TABLE t1;
11+
INSERT INTO t1 VALUES ('abc'),('abc '),(' a'),(' a '),('a ');
12+
SELECT HEX(a), a FROM t1 ORDER BY a;
13+
SELECT HEX(a), a FROM t1 IGNORE INDEX(PRIMARY) ORDER BY a;
14+
SELECT HEX(a), a FROM t1 IGNORE INDEX(PRIMARY) ORDER BY a DESC;
15+
16+
--echo #
17+
--echo # UNION
18+
--echo #
19+
20+
eval CREATE TABLE t2 (a VARCHAR(10)) COLLATE $coll;
21+
INSERT INTO t2 VALUES ('abc '),('abc '),(' a'),('a ');
22+
SELECT HEX(a),a FROM (SELECT * FROM t1 UNION SELECT * FROM t2 ORDER BY a) td;
23+
DROP TABLE t1;
24+
DROP TABLE t2;
25+
26+
--echo #
27+
--echo # DISTINCT, COUNT, MAX
28+
--echo #
29+
30+
eval CREATE TABLE t1 (a VARCHAR(10)) COLLATE $coll;
31+
INSERT INTO t1 VALUES ('a'),('a '),(' a'),(' a '),('a ');
32+
SELECT HEX(a), a FROM (SELECT DISTINCT a FROM t1 ORDER BY a) td;
33+
SELECT COUNT(DISTINCT a) FROM t1 ORDER BY a;
34+
SELECT HEX(MAX(a)), MAX(a) FROM t1;
35+
36+
--echo #
37+
--echo # GROUP BY
38+
--echo #
39+
40+
eval CREATE TABLE t2 (a VARCHAR(10), b int, c varchar(10)) COLLATE $coll;
41+
INSERT t2 values('ab', 12, 'cd'), ('ab', 2, 'ed'), ('aa', 20, 'er'), ('aa ', 0, 'er ');
42+
SELECT HEX(a), cnt FROM (SELECT a, COUNT(a) AS cnt FROM t2 GROUP BY a ORDER BY a) AS td;
43+
DROP TABLE t2;
44+
45+
--echo #
46+
--echo # Weights
47+
--echo #
48+
49+
SELECT HEX(WEIGHT_STRING(a AS CHAR(10))) FROM t1;
50+
DROP TABLE t1;
51+
52+
--echo #
53+
--echo # IF, CASE, LEAST
54+
--echo #
55+
56+
eval SELECT IF('abc' COLLATE $coll = 'abc ', 'pad', 'nopad');
57+
eval SELECT CASE 'abc' COLLATE $coll WHEN 'abc ' THEN 'pad' ELSE 'nopad' END;
58+
eval SELECT CASE WHEN 'abc' COLLATE $coll = 'abc ' THEN 'pad' ELSE 'nopad' END;
59+
eval SELECT HEX(LEAST('abc ' COLLATE $coll, 'abc '));
60+
eval SELECT HEX(GREATEST('abc ' COLLATE $coll, 'abc '));
61+
62+
--echo #
63+
--echo # Collation mix
64+
--echo #
65+
66+
eval CREATE TABLE t1 (a VARCHAR(10)) COLLATE $coll_pad;
67+
INSERT INTO t1 VALUES ('a'),('a ');
68+
SELECT COUNT(*) FROM t1 WHERE a='a';
69+
eval SELECT COUNT(*) FROM t1 WHERE a='a' COLLATE $coll_pad;
70+
eval SELECT COUNT(*) FROM t1 WHERE a='a' COLLATE $coll;
71+
eval ALTER TABLE t1 MODIFY a VARCHAR(10) COLLATE $coll;
72+
SELECT COUNT(*) FROM t1 WHERE a='a';
73+
eval SELECT COUNT(*) FROM t1 WHERE a='a' COLLATE $coll_pad;
74+
eval SELECT COUNT(*) FROM t1 WHERE a='a' COLLATE $coll;
75+
DROP TABLE t1;
76+
77+
--echo #
78+
--echo # End of ctype_pad.inc
79+
--echo #
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
SET STORAGE_ENGINE=MyISAM;
2+
--source include/ctype_pad.inc
3+
4+
SET STORAGE_ENGINE=HEAP;
5+
--source include/ctype_pad.inc
6+
7+
SET STORAGE_ENGINE=Default;

0 commit comments

Comments
 (0)