@@ -201,6 +201,10 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
201
201
#define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */
202
202
#define MY_CS_LOWER_SORT 32768 /* If use lower case as weight */
203
203
#define MY_CS_STRNXFRM_BAD_NWEIGHTS 0x10000 /* strnxfrm ignores "nweights" */
204
+ #define MY_CS_NOPAD 0x20000 /* if does not ignore trailing spaces */
205
+ #define MY_CS_NON1TO1 0x40000 /* Has a complex mapping from characters
206
+ to weights, e.g. contractions, expansions,
207
+ ignorable characters */
204
208
#define MY_CHARSET_UNDEFINED 0
205
209
206
210
/* Character repertoire flags */
@@ -511,6 +515,20 @@ struct my_charset_handler_st
511
515
char * dst , size_t dst_length ,
512
516
const char * src , size_t src_length ,
513
517
size_t nchars , MY_STRCOPY_STATUS * status );
518
+ /**
519
+ Write a character to the target string, using its native code.
520
+ For Unicode character sets (utf8, ucs2, utf16, utf16le, utf32, filename)
521
+ native codes are equvalent to Unicode code points.
522
+ For 8bit character sets the native code is just the byte value.
523
+ For Asian characters sets:
524
+ - MB1 native code is just the byte value (e.g. on the ASCII range)
525
+ - MB2 native code is ((b0 << 8) + b1).
526
+ - MB3 native code is ((b0 <<16) + (b1 << 8) + b2)
527
+ Note, CHARSET_INFO::min_sort_char and CHARSET_INFO::max_sort_char
528
+ are defined in native notation and should be written using
529
+ cs->cset->native_to_mb() rather than cs->cset->wc_mb().
530
+ */
531
+ my_charset_conv_wc_mb native_to_mb ;
514
532
};
515
533
516
534
extern MY_CHARSET_HANDLER my_charset_8bit_handler ;
@@ -664,6 +682,7 @@ extern int my_strcasecmp_8bit(CHARSET_INFO * cs, const char *, const char *);
664
682
665
683
int my_mb_wc_8bit (CHARSET_INFO * cs ,my_wc_t * wc , const uchar * s ,const uchar * e );
666
684
int my_wc_mb_8bit (CHARSET_INFO * cs ,my_wc_t wc , uchar * s , uchar * e );
685
+ int my_wc_mb_bin (CHARSET_INFO * cs ,my_wc_t wc , uchar * s , uchar * e );
667
686
668
687
int my_mb_ctype_8bit (CHARSET_INFO * ,int * , const uchar * ,const uchar * );
669
688
int my_mb_ctype_mb (CHARSET_INFO * ,int * , const uchar * ,const uchar * );
0 commit comments