Skip to content

Commit

Permalink
Adding MY_CHARSET_HANDLER::native_to_mb().
Browse files Browse the repository at this point in the history
This is a pre-requisite patch for:
- MDEV-8433 Make field<'broken-string' use indexes
- MDEV-8625 Bad result set with ignorable characters when using a prefix key
- MDEV-8626 Bad result set with contractions when using a prefix key
  • Loading branch information
Alexander Barkov committed Aug 14, 2015
1 parent bfb6ea0 commit 78b80cb
Show file tree
Hide file tree
Showing 17 changed files with 102 additions and 23 deletions.
15 changes: 15 additions & 0 deletions include/m_ctype.h
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,20 @@ struct my_charset_handler_st
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, MY_STRCOPY_STATUS *status);
/**
Write a character to the target string, using its native code.
For Unicode character sets (utf8, ucs2, utf16, utf16le, utf32, filename)
native codes are equvalent to Unicode code points.
For 8bit character sets the native code is just the byte value.
For Asian characters sets:
- MB1 native code is just the byte value (e.g. on the ASCII range)
- MB2 native code is ((b0 << 8) + b1).
- MB3 native code is ((b0 <<16) + (b1 << 8) + b2)
Note, CHARSET_INFO::min_sort_char and CHARSET_INFO::max_sort_char
are defined in native notation and should be written using
cs->cset->native_to_mb() rather than cs->cset->wc_mb().
*/
my_charset_conv_wc_mb native_to_mb;
};

extern MY_CHARSET_HANDLER my_charset_8bit_handler;
Expand Down Expand Up @@ -664,6 +678,7 @@ extern int my_strcasecmp_8bit(CHARSET_INFO * cs, const char *, const char *);

int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc, const uchar *s,const uchar *e);
int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
int my_wc_mb_bin(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);

int my_mb_ctype_8bit(CHARSET_INFO *,int *, const uchar *,const uchar *);
int my_mb_ctype_mb(CHARSET_INFO *,int *, const uchar *,const uchar *);
Expand Down
1 change: 1 addition & 0 deletions strings/ctype-big5.c
Original file line number Diff line number Diff line change
Expand Up @@ -6847,6 +6847,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
my_charlen_big5,
my_well_formed_char_length_big5,
my_copy_fix_mb,
my_native_to_mb_big5,
};

struct charset_info_st my_charset_big5_chinese_ci=
Expand Down
7 changes: 3 additions & 4 deletions strings/ctype-bin.c
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,8 @@ static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)),
}


static int my_wc_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t wc,
uchar *s,
uchar *e __attribute__((unused)))
int my_wc_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t wc, uchar *s, uchar *e)
{
if (s >= e)
return MY_CS_TOOSMALL;
Expand Down Expand Up @@ -552,6 +550,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_charlen_8bit,
my_well_formed_char_length_8bit,
my_copy_8bit,
my_wc_mb_bin,
};


Expand Down
1 change: 1 addition & 0 deletions strings/ctype-cp932.c
Original file line number Diff line number Diff line change
Expand Up @@ -34722,6 +34722,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_charlen_cp932,
my_well_formed_char_length_cp932,
my_copy_fix_mb,
my_native_to_mb_cp932,
};


Expand Down
1 change: 1 addition & 0 deletions strings/ctype-euc_kr.c
Original file line number Diff line number Diff line change
Expand Up @@ -10016,6 +10016,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_charlen_euckr,
my_well_formed_char_length_euckr,
my_copy_fix_mb,
my_native_to_mb_euckr,
};


Expand Down
1 change: 1 addition & 0 deletions strings/ctype-eucjpms.c
Original file line number Diff line number Diff line change
Expand Up @@ -67549,6 +67549,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_charlen_eucjpms,
my_well_formed_char_length_eucjpms,
my_copy_fix_mb,
my_native_to_mb_eucjpms,
};


Expand Down
1 change: 1 addition & 0 deletions strings/ctype-gb2312.c
Original file line number Diff line number Diff line change
Expand Up @@ -6420,6 +6420,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_charlen_gb2312,
my_well_formed_char_length_gb2312,
my_copy_fix_mb,
my_native_to_mb_gb2312,
};


Expand Down
1 change: 1 addition & 0 deletions strings/ctype-gbk.c
Original file line number Diff line number Diff line change
Expand Up @@ -10732,6 +10732,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_charlen_gbk,
my_well_formed_char_length_gbk,
my_copy_fix_mb,
my_native_to_mb_gbk,
};


Expand Down
1 change: 1 addition & 0 deletions strings/ctype-latin1.c
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_charlen_8bit,
my_well_formed_char_length_8bit,
my_copy_8bit,
my_wc_mb_bin, /* native_to_mb */
};


Expand Down
21 changes: 2 additions & 19 deletions strings/ctype-mb.c
Original file line number Diff line number Diff line change
Expand Up @@ -811,25 +811,8 @@ my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
{
char buf[10];
char buflen;

if (!(cs->state & MY_CS_UNICODE))
{
if (cs->max_sort_char <= 255)
{
bfill(str, end - str, cs->max_sort_char);
return;
}
buf[0]= cs->max_sort_char >> 8;
buf[1]= cs->max_sort_char & 0xFF;
buflen= 2;
}
else
{
buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
(uchar*) buf + sizeof(buf));
}

char buflen= cs->cset->native_to_mb(cs, cs->max_sort_char, (uchar*) buf,
(uchar*) buf + sizeof(buf));
DBUG_ASSERT(buflen > 0);
do
{
Expand Down
64 changes: 64 additions & 0 deletions strings/ctype-mb.ic
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#define DEFINE_WELL_FORMED_LEN
#define DEFINE_WELL_FORMED_CHAR_LENGTH
#define DEFINE_CHARLEN
#define DEFINE_NATIVE_TO_MB_VARLEN
#endif


Expand Down Expand Up @@ -257,4 +258,67 @@ MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused
}
#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */


#ifdef DEFINE_NATIVE_TO_MB_VARLEN
/*
Write a native 2-byte character.
If the full character does not fit, only the first byte is written.
*/
static inline int
my_native_to_mb_fixed2(my_wc_t wc, uchar *s, uchar *e)
{
/* The caller must insure there is a space for at least one byte */
DBUG_ASSERT(s < e);
s[0]= wc >> 8;
if (s + 2 > e)
return MY_CS_TOOSMALL2;
s[1]= wc & 0xFF;
return 2;
}


/*
Write a native 3-byte character.
If the full character does not fit, only the leading bytes are written.
*/
static inline int
my_native_to_mb_fixed3(my_wc_t wc, uchar *s, uchar *e)
{
/* The caller must insure there is a space for at least one byte */
DBUG_ASSERT(s < e);
s[0]= wc >> 16;
if (s + 2 > e)
return MY_CS_TOOSMALL2;
s[1]= (wc >> 8) & 0xFF;
if (s + 3 > e)
return MY_CS_TOOSMALL3;
s[2]= wc & 0xFF;
return 3;
}


/*
Write a native 1-byte or 2-byte or 3-byte character.
*/

static int
MY_FUNCTION_NAME(native_to_mb)(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t wc, uchar *s, uchar *e)
{
if (s >= e)
return MY_CS_TOOSMALL;
if ((int) wc <= 0xFF)
{
s[0]= (uchar) wc;
return 1;
}
#ifdef IS_MB3_HEAD
if (wc > 0xFFFF)
return my_native_to_mb_fixed3(wc, s, e);
#endif
return my_native_to_mb_fixed2(wc, s, e);
}
#endif /* DEFINE_NATIVE_TO_MB_VARLEN */


#undef MY_FUNCTION_NAME
1 change: 1 addition & 0 deletions strings/ctype-simple.c
Original file line number Diff line number Diff line change
Expand Up @@ -1950,6 +1950,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
my_charlen_8bit,
my_well_formed_char_length_8bit,
my_copy_8bit,
my_wc_mb_bin, /* native_to_mb */
};

MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
Expand Down
1 change: 1 addition & 0 deletions strings/ctype-sjis.c
Original file line number Diff line number Diff line change
Expand Up @@ -34101,6 +34101,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_charlen_sjis,
my_well_formed_char_length_sjis,
my_copy_fix_mb,
my_native_to_mb_sjis,
};


Expand Down
1 change: 1 addition & 0 deletions strings/ctype-tis620.c
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_charlen_8bit,
my_well_formed_char_length_8bit,
my_copy_8bit,
my_wc_mb_bin, /* native_to_mb */
};


Expand Down
4 changes: 4 additions & 0 deletions strings/ctype-ucs2.c
Original file line number Diff line number Diff line change
Expand Up @@ -1590,6 +1590,7 @@ MY_CHARSET_HANDLER my_charset_utf16_handler=
my_charlen_utf16,
my_well_formed_char_length_utf16,
my_copy_fix_mb2_or_mb4,
my_uni_utf16,
};


Expand Down Expand Up @@ -1812,6 +1813,7 @@ static MY_CHARSET_HANDLER my_charset_utf16le_handler=
my_charlen_utf16,
my_well_formed_char_length_utf16,
my_copy_fix_mb2_or_mb4,
my_uni_utf16le,
};


Expand Down Expand Up @@ -2556,6 +2558,7 @@ MY_CHARSET_HANDLER my_charset_utf32_handler=
my_charlen_utf32,
my_well_formed_char_length_utf32,
my_copy_fix_mb2_or_mb4,
my_uni_utf32,
};


Expand Down Expand Up @@ -3042,6 +3045,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
my_charlen_ucs2,
my_well_formed_char_length_ucs2,
my_copy_fix_mb2_or_mb4,
my_uni_ucs2,
};


Expand Down
1 change: 1 addition & 0 deletions strings/ctype-ujis.c
Original file line number Diff line number Diff line change
Expand Up @@ -67293,6 +67293,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_charlen_ujis,
my_well_formed_char_length_ujis,
my_copy_fix_mb,
my_native_to_mb_ujis,
};


Expand Down
3 changes: 3 additions & 0 deletions strings/ctype-utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -5526,6 +5526,7 @@ MY_CHARSET_HANDLER my_charset_utf8_handler=
my_charlen_utf8,
my_well_formed_char_length_utf8,
my_copy_fix_mb,
my_uni_utf8,
};


Expand Down Expand Up @@ -7109,6 +7110,7 @@ static MY_CHARSET_HANDLER my_charset_filename_handler=
my_charlen_filename,
my_well_formed_char_length_filename,
my_copy_fix_mb,
my_wc_mb_filename,
};


Expand Down Expand Up @@ -7879,6 +7881,7 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
my_charlen_utf8mb4,
my_well_formed_char_length_utf8mb4,
my_copy_fix_mb,
my_wc_mb_utf8mb4,
};


Expand Down

0 comments on commit 78b80cb

Please sign in to comment.