Skip to content

Commit 2ad287c

Browse files
committed
MDEV-31069 Reuse duplicate char-to-weight conversion code in ctype-utf8.c and ctype-ucs2.c
Removing similar functions from ctype-utf8.c and ctype-ucs2.c - my_tosort_utf16() - my_tosort_utf32() - my_tosort_ucs2() - my_tosort_unicode() Adding new shared functions into ctype-unidata.h: - my_tosort_unicode_bmp() - reused for utf8mb3, ucs2 - my_tosort_unicode() - reused for utf8mb4, utf16, utf32 For simplicity, the new version of my_tosort_unicode*() does not include the code handling the MY_CS_LOWER_SORT flag because: - it affects performance negatively - we don't have any collations with this flag yet anyway (This code was most likely earlier erroneously merged from MySQL's utf8_tolower_ci at some point.)
1 parent 30b4bb4 commit 2ad287c

File tree

3 files changed

+39
-70
lines changed

3 files changed

+39
-70
lines changed

strings/ctype-ucs2.c

Lines changed: 3 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,22 +1284,6 @@ my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
12841284
const char charset_name_utf16le[]= "utf16le";
12851285
#define charset_name_utf16le_length (sizeof(charset_name_utf16le)-1)
12861286

1287-
static inline void
1288-
my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1289-
{
1290-
if (*wc <= uni_plane->maxchar)
1291-
{
1292-
MY_UNICASE_CHARACTER *page;
1293-
if ((page= uni_plane->page[*wc >> 8]))
1294-
*wc= page[*wc & 0xFF].sort;
1295-
}
1296-
else
1297-
{
1298-
*wc= MY_CS_REPLACEMENT_CHARACTER;
1299-
}
1300-
}
1301-
1302-
13031287

13041288
static size_t
13051289
my_caseup_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
@@ -1341,7 +1325,7 @@ my_hash_sort_utf16_nopad(CHARSET_INFO *cs,
13411325

13421326
while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
13431327
{
1344-
my_tosort_utf16(uni_plane, &wc);
1328+
my_tosort_unicode(uni_plane, &wc);
13451329
MY_HASH_ADD_16(m1, m2, wc);
13461330
s+= res;
13471331
}
@@ -2178,22 +2162,6 @@ my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
21782162
}
21792163

21802164

2181-
static inline void
2182-
my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2183-
{
2184-
if (*wc <= uni_plane->maxchar)
2185-
{
2186-
MY_UNICASE_CHARACTER *page;
2187-
if ((page= uni_plane->page[*wc >> 8]))
2188-
*wc= page[*wc & 0xFF].sort;
2189-
}
2190-
else
2191-
{
2192-
*wc= MY_CS_REPLACEMENT_CHARACTER;
2193-
}
2194-
}
2195-
2196-
21972165
static size_t
21982166
my_lengthsp_utf32(CHARSET_INFO *cs __attribute__((unused)),
21992167
const char *ptr, size_t length)
@@ -2242,7 +2210,7 @@ my_hash_sort_utf32_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
22422210

22432211
while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
22442212
{
2245-
my_tosort_utf32(uni_plane, &wc);
2213+
my_tosort_unicode(uni_plane, &wc);
22462214
MY_HASH_ADD(m1, m2, (uint) (wc >> 24));
22472215
MY_HASH_ADD(m1, m2, (uint) (wc >> 16) & 0xFF);
22482216
MY_HASH_ADD(m1, m2, (uint) (wc >> 8) & 0xFF);
@@ -3082,14 +3050,6 @@ static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
30823050
}
30833051

30843052

3085-
static inline void
3086-
my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
3087-
{
3088-
MY_UNICASE_CHARACTER *page;
3089-
if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
3090-
*wc= page[*wc & 0xFF].sort;
3091-
}
3092-
30933053
static size_t my_caseup_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
30943054
char *dst, size_t dstlen)
30953055
{
@@ -3125,7 +3085,7 @@ my_hash_sort_ucs2_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
31253085

31263086
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
31273087
{
3128-
my_tosort_ucs2(uni_plane, &wc);
3088+
my_tosort_unicode_bmp(uni_plane, &wc);
31293089
MY_HASH_ADD_16(m1, m2, wc);
31303090
s+=res;
31313091
}

strings/ctype-unidata.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,32 @@ static inline my_wc_t my_u300_toupper_7bit(uchar ch)
3636
}
3737

3838

39+
static inline void my_tosort_unicode_bmp(MY_UNICASE_INFO *uni_plane,
40+
my_wc_t *wc)
41+
{
42+
const MY_UNICASE_CHARACTER *page;
43+
DBUG_ASSERT(*wc <= uni_plane->maxchar);
44+
if ((page= uni_plane->page[*wc >> 8]))
45+
*wc= page[*wc & 0xFF].sort;
46+
}
47+
48+
49+
static inline void my_tosort_unicode(MY_UNICASE_INFO *uni_plane,
50+
my_wc_t *wc)
51+
{
52+
if (*wc <= uni_plane->maxchar)
53+
{
54+
const MY_UNICASE_CHARACTER *page;
55+
if ((page= uni_plane->page[*wc >> 8]))
56+
*wc= page[*wc & 0xFF].sort;
57+
}
58+
else
59+
{
60+
*wc= MY_CS_REPLACEMENT_CHARACTER;
61+
}
62+
}
63+
64+
3965
static inline void
4066
my_tolower_unicode_bmp(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
4167
{

strings/ctype-utf8.c

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4638,23 +4638,6 @@ MY_UNICASE_INFO my_unicase_unicode520=
46384638
};
46394639

46404640

4641-
static inline void
4642-
my_tosort_unicode(MY_UNICASE_INFO *uni_plane, my_wc_t *wc, uint flags)
4643-
{
4644-
if (*wc <= uni_plane->maxchar)
4645-
{
4646-
MY_UNICASE_CHARACTER *page;
4647-
if ((page= uni_plane->page[*wc >> 8]))
4648-
*wc= (flags & MY_CS_LOWER_SORT) ?
4649-
page[*wc & 0xFF].tolower :
4650-
page[*wc & 0xFF].sort;
4651-
}
4652-
else
4653-
{
4654-
*wc= MY_CS_REPLACEMENT_CHARACTER;
4655-
}
4656-
}
4657-
46584641

46594642
static uint
46604643
my_casefold_multiply_utf8mbx(CHARSET_INFO *cs)
@@ -4734,8 +4717,8 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
47344717
{
47354718
if (weights)
47364719
{
4737-
my_tosort_unicode(weights, &s_wc, cs->state);
4738-
my_tosort_unicode(weights, &w_wc, cs->state);
4720+
my_tosort_unicode(weights, &s_wc);
4721+
my_tosort_unicode(weights, &w_wc);
47394722
}
47404723
if (s_wc != w_wc)
47414724
return 1; /* No match */
@@ -4803,8 +4786,8 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
48034786
return 1;
48044787
if (weights)
48054788
{
4806-
my_tosort_unicode(weights, &s_wc, cs->state);
4807-
my_tosort_unicode(weights, &w_wc, cs->state);
4789+
my_tosort_unicode(weights, &s_wc);
4790+
my_tosort_unicode(weights, &w_wc);
48084791
}
48094792

48104793
if (s_wc == w_wc)
@@ -5242,7 +5225,7 @@ static void my_hash_sort_utf8mb3_nopad(CHARSET_INFO *cs, const uchar *s, size_t
52425225

52435226
while ((s < e) && (res=my_utf8mb3_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
52445227
{
5245-
my_tosort_unicode(uni_plane, &wc, cs->state);
5228+
my_tosort_unicode(uni_plane, &wc);
52465229
MY_HASH_ADD_16(m1, m2, wc);
52475230
s+= res;
52485231
}
@@ -5976,8 +5959,8 @@ static int my_strnncoll_utf8mb3_cs(CHARSET_INFO *cs,
59765959
save_diff = ((int)s_wc) - ((int)t_wc);
59775960
}
59785961

5979-
my_tosort_unicode(uni_plane, &s_wc, cs->state);
5980-
my_tosort_unicode(uni_plane, &t_wc, cs->state);
5962+
my_tosort_unicode(uni_plane, &s_wc);
5963+
my_tosort_unicode(uni_plane, &t_wc);
59815964

59825965
if ( s_wc != t_wc )
59835966
{
@@ -6018,8 +6001,8 @@ static int my_strnncollsp_utf8mb3_cs(CHARSET_INFO *cs,
60186001
save_diff = ((int)s_wc) - ((int)t_wc);
60196002
}
60206003

6021-
my_tosort_unicode(uni_plane, &s_wc, cs->state);
6022-
my_tosort_unicode(uni_plane, &t_wc, cs->state);
6004+
my_tosort_unicode(uni_plane, &s_wc);
6005+
my_tosort_unicode(uni_plane, &t_wc);
60236006

60246007
if ( s_wc != t_wc )
60256008
{
@@ -7697,7 +7680,7 @@ my_hash_sort_utf8mb4_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
76977680

76987681
while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
76997682
{
7700-
my_tosort_unicode(uni_plane, &wc, cs->state);
7683+
my_tosort_unicode(uni_plane, &wc);
77017684
MY_HASH_ADD_16(m1, m2, (uint) (wc & 0xFFFF));
77027685
if (wc > 0xFFFF)
77037686
{

0 commit comments

Comments
 (0)