Skip to content

Commit

Permalink
MDEV-22849 Reuse skip_trailing_space() in my_hash_sort_utf8mbX
Browse files Browse the repository at this point in the history
Replacing the slow loop in my_hash_sort_utf8mbX() to the fast
skip_trailing_spaces(), which consumes 8 bytes in one iteration,
and is around 8 times faster on long data.

Also, renaming:
- my_hash_sort_utf8() to my_hash_sort_utf8mb3()
- my_hash_sort_utf8_nopad() to my_hash_sort_utf8mb3_nopad()
to merge to 10.5 easier (automatically?).
  • Loading branch information
abarkov committed Jun 10, 2020
1 parent 9027427 commit 9b9a354
Showing 1 changed file with 11 additions and 15 deletions.
26 changes: 11 additions & 15 deletions strings/ctype-utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -5159,8 +5159,8 @@ static size_t my_caseup_utf8(CHARSET_INFO *cs, const char *src, size_t srclen,
}


static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
static void my_hash_sort_utf8mb3_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
my_wc_t wc;
int res;
Expand All @@ -5179,17 +5179,15 @@ static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t sle
}


static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
static void my_hash_sort_utf8mb3(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
const uchar *e= s+slen;
/*
Remove end space. We have to do this to be able to compare
'A ' and 'A' as identical
*/
while (e > s && e[-1] == ' ')
e--;
my_hash_sort_utf8_nopad(cs, s, e - s, nr1, nr2);
const uchar *e= skip_trailing_space(s, slen);
my_hash_sort_utf8mb3_nopad(cs, s, e - s, nr1, nr2);
}


Expand Down Expand Up @@ -5540,7 +5538,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler =
my_wildcmp_utf8,
my_strcasecmp_utf8,
my_instr_mb,
my_hash_sort_utf8,
my_hash_sort_utf8mb3,
my_propagate_complex
};

Expand All @@ -5556,7 +5554,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler =
my_wildcmp_utf8,
my_strcasecmp_utf8,
my_instr_mb,
my_hash_sort_utf8,
my_hash_sort_utf8mb3,
my_propagate_complex
};

Expand Down Expand Up @@ -5588,7 +5586,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler =
my_wildcmp_utf8,
my_strcasecmp_utf8,
my_instr_mb,
my_hash_sort_utf8_nopad,
my_hash_sort_utf8mb3_nopad,
my_propagate_complex
};

Expand Down Expand Up @@ -7224,7 +7222,7 @@ static MY_COLLATION_HANDLER my_collation_filename_handler =
my_wildcmp_utf8,
my_strcasecmp_utf8,
my_instr_mb,
my_hash_sort_utf8,
my_hash_sort_utf8mb3,
my_propagate_complex
};

Expand Down Expand Up @@ -7625,13 +7623,11 @@ static void
my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
const uchar *e= s + slen;
/*
Remove end space. We do this to be able to compare
'A ' and 'A' as identical
*/
while (e > s && e[-1] == ' ')
e--;
const uchar *e= skip_trailing_space(s, slen);
my_hash_sort_utf8mb4_nopad(cs, s, e - s, nr1, nr2);
}

Expand Down

0 comments on commit 9b9a354

Please sign in to comment.