Skip to content

Commit 78b80cb

Browse files
author
Alexander Barkov
committed
Adding MY_CHARSET_HANDLER::native_to_mb().
This is a pre-requisite patch for: - MDEV-8433 Make field<'broken-string' use indexes - MDEV-8625 Bad result set with ignorable characters when using a prefix key - MDEV-8626 Bad result set with contractions when using a prefix key
1 parent bfb6ea0 commit 78b80cb

17 files changed

+102
-23
lines changed

include/m_ctype.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,20 @@ struct my_charset_handler_st
511511
char *dst, size_t dst_length,
512512
const char *src, size_t src_length,
513513
size_t nchars, MY_STRCOPY_STATUS *status);
514+
/**
515+
Write a character to the target string, using its native code.
516+
For Unicode character sets (utf8, ucs2, utf16, utf16le, utf32, filename)
517+
native codes are equvalent to Unicode code points.
518+
For 8bit character sets the native code is just the byte value.
519+
For Asian characters sets:
520+
- MB1 native code is just the byte value (e.g. on the ASCII range)
521+
- MB2 native code is ((b0 << 8) + b1).
522+
- MB3 native code is ((b0 <<16) + (b1 << 8) + b2)
523+
Note, CHARSET_INFO::min_sort_char and CHARSET_INFO::max_sort_char
524+
are defined in native notation and should be written using
525+
cs->cset->native_to_mb() rather than cs->cset->wc_mb().
526+
*/
527+
my_charset_conv_wc_mb native_to_mb;
514528
};
515529

516530
extern MY_CHARSET_HANDLER my_charset_8bit_handler;
@@ -664,6 +678,7 @@ extern int my_strcasecmp_8bit(CHARSET_INFO * cs, const char *, const char *);
664678

665679
int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc, const uchar *s,const uchar *e);
666680
int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
681+
int my_wc_mb_bin(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
667682

668683
int my_mb_ctype_8bit(CHARSET_INFO *,int *, const uchar *,const uchar *);
669684
int my_mb_ctype_mb(CHARSET_INFO *,int *, const uchar *,const uchar *);

strings/ctype-big5.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6847,6 +6847,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
68476847
my_charlen_big5,
68486848
my_well_formed_char_length_big5,
68496849
my_copy_fix_mb,
6850+
my_native_to_mb_big5,
68506851
};
68516852

68526853
struct charset_info_st my_charset_big5_chinese_ci=

strings/ctype-bin.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,8 @@ static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)),
256256
}
257257

258258

259-
static int my_wc_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
260-
my_wc_t wc,
261-
uchar *s,
262-
uchar *e __attribute__((unused)))
259+
int my_wc_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
260+
my_wc_t wc, uchar *s, uchar *e)
263261
{
264262
if (s >= e)
265263
return MY_CS_TOOSMALL;
@@ -552,6 +550,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
552550
my_charlen_8bit,
553551
my_well_formed_char_length_8bit,
554552
my_copy_8bit,
553+
my_wc_mb_bin,
555554
};
556555

557556

strings/ctype-cp932.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34722,6 +34722,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
3472234722
my_charlen_cp932,
3472334723
my_well_formed_char_length_cp932,
3472434724
my_copy_fix_mb,
34725+
my_native_to_mb_cp932,
3472534726
};
3472634727

3472734728

strings/ctype-euc_kr.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10016,6 +10016,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
1001610016
my_charlen_euckr,
1001710017
my_well_formed_char_length_euckr,
1001810018
my_copy_fix_mb,
10019+
my_native_to_mb_euckr,
1001910020
};
1002010021

1002110022

strings/ctype-eucjpms.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67549,6 +67549,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
6754967549
my_charlen_eucjpms,
6755067550
my_well_formed_char_length_eucjpms,
6755167551
my_copy_fix_mb,
67552+
my_native_to_mb_eucjpms,
6755267553
};
6755367554

6755467555

strings/ctype-gb2312.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6420,6 +6420,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
64206420
my_charlen_gb2312,
64216421
my_well_formed_char_length_gb2312,
64226422
my_copy_fix_mb,
6423+
my_native_to_mb_gb2312,
64236424
};
64246425

64256426

strings/ctype-gbk.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10732,6 +10732,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
1073210732
my_charlen_gbk,
1073310733
my_well_formed_char_length_gbk,
1073410734
my_copy_fix_mb,
10735+
my_native_to_mb_gbk,
1073510736
};
1073610737

1073710738

strings/ctype-latin1.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
425425
my_charlen_8bit,
426426
my_well_formed_char_length_8bit,
427427
my_copy_8bit,
428+
my_wc_mb_bin, /* native_to_mb */
428429
};
429430

430431

strings/ctype-mb.c

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -811,25 +811,8 @@ my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
811811
static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
812812
{
813813
char buf[10];
814-
char buflen;
815-
816-
if (!(cs->state & MY_CS_UNICODE))
817-
{
818-
if (cs->max_sort_char <= 255)
819-
{
820-
bfill(str, end - str, cs->max_sort_char);
821-
return;
822-
}
823-
buf[0]= cs->max_sort_char >> 8;
824-
buf[1]= cs->max_sort_char & 0xFF;
825-
buflen= 2;
826-
}
827-
else
828-
{
829-
buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
830-
(uchar*) buf + sizeof(buf));
831-
}
832-
814+
char buflen= cs->cset->native_to_mb(cs, cs->max_sort_char, (uchar*) buf,
815+
(uchar*) buf + sizeof(buf));
833816
DBUG_ASSERT(buflen > 0);
834817
do
835818
{

0 commit comments

Comments
 (0)