diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index b9c5f55051f6c..c5fb0fca1689a 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -1056,18 +1056,19 @@ Lex_input_stream::unescape(CHARSET_INFO *cs, char *to, Fix sometimes to do only one scan of the string */ -bool Lex_input_stream::get_text(LEX_STRING *dst, uint sep, +bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep, int pre_skip, int post_skip) { reg1 uchar c; uint found_escape=0; CHARSET_INFO *cs= m_thd->charset(); - tok_bitmap= 0; + dst->set_8bit(false); while (! eof()) { c= yyGet(); - tok_bitmap|= c; + if (c & 0x80) + dst->set_8bit(true); #ifdef USE_MB { int l; @@ -1433,18 +1434,17 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) } /* Found N'string' */ lip->yySkip(); // Skip ' - if (lip->get_text(&yylval->lex_str, (sep= lip->yyGetLast()), 2, 1)) + if (lip->get_text(&yylval->lex_string_with_metadata, + (sep= lip->yyGetLast()), 2, 1)) { state= MY_LEX_CHAR; // Read char by char break; } lip->body_utf8_append(lip->m_cpp_text_start); - lip->body_utf8_append_escape(thd, &yylval->lex_str, + lip->body_utf8_append_escape(thd, &yylval->lex_string_with_metadata, national_charset_info, lip->m_cpp_text_end, sep); - - lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1; return(NCHAR_STRING); } case MY_LEX_IDENT_OR_HEX: @@ -1798,7 +1798,8 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) case MY_LEX_STRING: // Incomplete text string { uint sep; - if (lip->get_text(&yylval->lex_str, (sep= lip->yyGetLast()), 1, 1)) + if (lip->get_text(&yylval->lex_string_with_metadata, + (sep= lip->yyGetLast()), 1, 1)) { state= MY_LEX_CHAR; // Read char by char break; @@ -1806,11 +1807,9 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) CHARSET_INFO *strcs= lip->m_underscore_cs ? lip->m_underscore_cs : cs; lip->body_utf8_append(lip->m_cpp_text_start); - lip->body_utf8_append_escape(thd, &yylval->lex_str, strcs, - lip->m_cpp_text_end, sep); + lip->body_utf8_append_escape(thd, &yylval->lex_string_with_metadata, + strcs, lip->m_cpp_text_end, sep); lip->m_underscore_cs= NULL; - - lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1; return(TEXT_STRING); } case MY_LEX_COMMENT: // Comment diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 87e15036ec78d..cd2c103e439e9 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -33,6 +33,38 @@ /* YACC and LEX Definitions */ + +/** + A string with metadata. + We'll add more flags here eventually, to know if the string has, e.g.: + - multi-byte characters + - bad byte sequences + - backslash escapes: 'a\nb' + - separator escapes: 'a''b' + and reuse the original query fragments instead of making the string + copy too early, in Lex_input_stream::get_text(). + This will allow to avoid unnecessary copying, as well as + create more optimal Item types in sql_yacc.yy +*/ +struct Lex_string_with_metadata_st: public LEX_STRING +{ + bool m_is_8bit; // True if the string has 8bit characters +public: + void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; } + // Get string repertoire by the 8-bit flag and the character set + uint repertoire(CHARSET_INFO *cs) const + { + return !m_is_8bit && my_charset_is_ascii_based(cs) ? + MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; + } + // Get string repertoire by the 8-bit flag, for ASCII-based character sets + uint repertoire() const + { + return !m_is_8bit ? MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; + } +}; + + enum sub_select_type { UNSPECIFIED_TYPE, @@ -2246,7 +2278,8 @@ class Lex_input_stream /** LALR(2) resolution, value of the look ahead token.*/ LEX_YYSTYPE lookahead_yylval; - bool get_text(LEX_STRING *to, uint sep, int pre_skip, int post_skip); + bool get_text(Lex_string_with_metadata_st *to, + uint sep, int pre_skip, int post_skip); void add_digest_token(uint token, LEX_YYSTYPE yylval); @@ -2325,9 +2358,6 @@ class Lex_input_stream */ const char *found_semicolon; - /** Token character bitmaps, to detect 7bit strings. */ - uchar tok_bitmap; - /** SQL_MODE = IGNORE_SPACE. */ bool ignore_space; @@ -2565,8 +2595,6 @@ struct LEX: public Query_tables_list DYNAMIC_ARRAY plugins; plugin_ref plugins_static_buffer[INITIAL_LEX_PLUGIN_LIST_SIZE]; - bool text_string_is_7bit; - /** SELECT of CREATE VIEW statement */ LEX_STRING create_view_select; diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 266ae5b441a1e..4ae9e91c68272 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -906,6 +906,7 @@ Virtual_column_info *add_virtual_expression(THD *thd, Item *expr) /* structs */ LEX_STRING lex_str; LEX_SYMBOL symbol; + Lex_string_with_metadata_st lex_string_with_metadata; struct sys_var_with_base variable; struct { int vars, conds, hndlrs, curs; } spblock; Lex_length_and_dec_st Lex_length_and_dec; @@ -1710,14 +1711,18 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %left INTERVAL_SYM %type - IDENT IDENT_QUOTED TEXT_STRING DECIMAL_NUM FLOAT_NUM NUM LONG_NUM + IDENT IDENT_QUOTED DECIMAL_NUM FLOAT_NUM NUM LONG_NUM HEX_NUM HEX_STRING LEX_HOSTNAME ULONGLONG_NUM field_ident select_alias ident ident_or_text IDENT_sys TEXT_STRING_sys TEXT_STRING_literal - NCHAR_STRING opt_component key_cache_name + opt_component key_cache_name sp_opt_label BIN_NUM label_ident TEXT_STRING_filesystem ident_or_empty opt_constraint constraint opt_ident +%type + TEXT_STRING + NCHAR_STRING + %type opt_table_alias @@ -13696,9 +13701,7 @@ text_literal: LEX_STRING tmp; CHARSET_INFO *cs_con= thd->variables.collation_connection; CHARSET_INFO *cs_cli= thd->variables.character_set_client; - uint repertoire= thd->lex->text_string_is_7bit && - my_charset_is_ascii_based(cs_cli) ? - MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; + uint repertoire= $1.repertoire(cs_cli); if (thd->charset_is_collation_connection || (repertoire == MY_REPERTOIRE_ASCII && my_charset_is_ascii_based(cs_con))) @@ -13717,13 +13720,11 @@ text_literal: } | NCHAR_STRING { - uint repertoire= Lex->text_string_is_7bit ? - MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info)); $$= new (thd->mem_root) Item_string(thd, $1.str, $1.length, national_charset_info, DERIVATION_COERCIBLE, - repertoire); + $1.repertoire()); if ($$ == NULL) MYSQL_YYABORT; }