Skip to content

Commit

Permalink
Step#2 MDEV-27896 Wrong result upon `COLLATE latin1_bin CHARACTER SET…
Browse files Browse the repository at this point in the history
… latin1` on the table or the database level

- Renaming Lex_charset_collation_st to
  Lex_exact_charset_extended_collation_attrs_st

- Renaming Lex_explicit_charset_opt_collate to
  Lex_exact_charset_opt_extended_collate

- Renaming their methods charset_collation() to charset_info(),
  so the name clearly tells that it returns CHARSET_INFO.
  Soon we'll have new classes (e.g. Lex_exact_collation) and
  methods returning Lex_exact_collation. So the old name would be
  confusing about the return type.
  • Loading branch information
abarkov committed May 23, 2022
1 parent 64a5fab commit e7f635e
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 69 deletions.
11 changes: 6 additions & 5 deletions sql/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -5504,20 +5504,21 @@ class Column_definition: public Sql_alloc,
void set_charset_collation_attrs(const
Lex_column_charset_collation_attrs_st &lc)
{
charset= lc.charset_collation();
charset= lc.charset_info();
if (lc.is_contextually_typed_collation())
flags|= CONTEXT_COLLATION_FLAG;
else
flags&= ~CONTEXT_COLLATION_FLAG;
}
Lex_column_charset_collation_attrs charset_collation_attrs() const
{
return Lex_charset_collation(
if (!charset)
return Lex_column_charset_collation_attrs();
return Lex_column_charset_collation_attrs(
charset,
!charset ? Lex_charset_collation_st::TYPE_EMPTY :
flags & CONTEXT_COLLATION_FLAG ?
Lex_charset_collation_st::TYPE_COLLATE_CONTEXTUALLY_TYPED :
Lex_charset_collation_st::TYPE_CHARACTER_SET);
Lex_column_charset_collation_attrs_st::TYPE_COLLATE_CONTEXTUALLY_TYPED :
Lex_column_charset_collation_attrs_st::TYPE_CHARACTER_SET);
}
};

Expand Down
72 changes: 38 additions & 34 deletions sql/lex_charset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@

/** find a collation with binary comparison rules
*/
CHARSET_INFO *Lex_charset_collation_st::find_bin_collation(CHARSET_INFO *cs)
CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
find_bin_collation(CHARSET_INFO *cs)
{
/*
We don't need to handle old_mode=UTF8_IS_UTF8MB3 here,
Expand Down Expand Up @@ -52,7 +53,8 @@ CHARSET_INFO *Lex_charset_collation_st::find_bin_collation(CHARSET_INFO *cs)
}


CHARSET_INFO *Lex_charset_collation_st::find_default_collation(CHARSET_INFO *cs)
CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
find_default_collation(CHARSET_INFO *cs)
{
// See comments in find_bin_collation()
DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4));
Expand All @@ -79,8 +81,8 @@ CHARSET_INFO *Lex_charset_collation_st::find_default_collation(CHARSET_INFO *cs)
}


bool Lex_charset_collation_st::set_charset_collate_exact(CHARSET_INFO *cs,
CHARSET_INFO *cl)
bool Lex_exact_charset_extended_collation_attrs_st::
set_charset_collate_exact(CHARSET_INFO *cs, CHARSET_INFO *cl)
{
DBUG_ASSERT(cs != nullptr && cl != nullptr);
if (!my_charset_same(cl, cs))
Expand All @@ -105,8 +107,8 @@ bool Lex_charset_collation_st::set_charset_collate_exact(CHARSET_INFO *cs,
"this" is the COLLATE clause (e.g. of a column)
"def" is the upper level CHARACTER SET clause (e.g. of a table)
*/
CHARSET_INFO *
Lex_charset_collation_st::resolved_to_character_set(CHARSET_INFO *def) const
CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
resolved_to_character_set(CHARSET_INFO *def) const
{
DBUG_ASSERT(def);

Expand Down Expand Up @@ -151,9 +153,9 @@ Lex_charset_collation_st::resolved_to_character_set(CHARSET_INFO *def) const
"this" corresponds to `CHARACTER SET xxx [BINARY]`
"cl" corresponds to the COLLATE clause
*/
bool
Lex_charset_collation_st::
merge_charset_clause_and_collate_clause(const Lex_charset_collation_st &cl)
bool Lex_exact_charset_extended_collation_attrs_st::
merge_charset_clause_and_collate_clause(
const Lex_exact_charset_extended_collation_attrs_st &cl)
{
if (cl.is_empty()) // No COLLATE clause
return false;
Expand All @@ -170,10 +172,10 @@ Lex_charset_collation_st::
case TYPE_CHARACTER_SET:
case TYPE_COLLATE_EXACT:
{
Lex_explicit_charset_opt_collate ecs(m_ci, m_type == TYPE_COLLATE_EXACT);
Lex_exact_charset_opt_extended_collate ecs(m_ci, m_type == TYPE_COLLATE_EXACT);
if (ecs.merge_collate_or_error(cl))
return true;
set_collate_exact(ecs.charset_and_collation());
set_collate_exact(ecs.charset_info());
return false;
}
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
Expand All @@ -200,19 +202,19 @@ Lex_charset_collation_st::
CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin
*/
if (is_contextually_typed_collate_default() &&
!(cl.charset_collation()->state & MY_CS_PRIMARY))
!(cl.charset_info()->state & MY_CS_PRIMARY))
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"COLLATE ", "DEFAULT", "COLLATE ",
cl.charset_collation()->coll_name.str);
cl.charset_info()->coll_name.str);
return true;
}

if (is_contextually_typed_binary_style() &&
!(cl.charset_collation()->state & MY_CS_BINSORT))
!(cl.charset_info()->state & MY_CS_BINSORT))
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"", "BINARY", "COLLATE ", cl.charset_collation()->coll_name.str);
"", "BINARY", "COLLATE ", cl.charset_info()->coll_name.str);
return true;
}
*this= cl;
Expand All @@ -224,19 +226,20 @@ Lex_charset_collation_st::
}


bool
Lex_explicit_charset_opt_collate::
merge_collate_or_error(const Lex_charset_collation_st &cl)
bool Lex_exact_charset_opt_extended_collate::
merge_collate_or_error(
const Lex_exact_charset_extended_collation_attrs_st &cl)
{
DBUG_ASSERT(cl.type() != Lex_charset_collation_st::TYPE_CHARACTER_SET);
DBUG_ASSERT(cl.type() !=
Lex_exact_charset_extended_collation_attrs_st::TYPE_CHARACTER_SET);

switch (cl.type()) {
case Lex_charset_collation_st::TYPE_EMPTY:
case Lex_exact_charset_extended_collation_attrs_st::TYPE_EMPTY:
return false;
case Lex_charset_collation_st::TYPE_CHARACTER_SET:
case Lex_exact_charset_extended_collation_attrs_st::TYPE_CHARACTER_SET:
DBUG_ASSERT(0);
return false;
case Lex_charset_collation_st::TYPE_COLLATE_EXACT:
case Lex_exact_charset_extended_collation_attrs_st::TYPE_COLLATE_EXACT:
/*
EXPLICIT + EXPLICIT
CHAR(10) CHARACTER SET latin1 .. COLLATE latin1_bin
Expand All @@ -245,31 +248,31 @@ Lex_explicit_charset_opt_collate::
CHAR(10) COLLATE latin1_bin .. COLLATE latin1_bin
CHAR(10) CHARACTER SET latin1 BINARY .. COLLATE latin1_bin
*/
if (m_with_collate && m_ci != cl.charset_collation())
if (m_with_collate && m_ci != cl.charset_info())
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"COLLATE ", m_ci->coll_name.str,
"COLLATE ", cl.charset_collation()->coll_name.str);
"COLLATE ", cl.charset_info()->coll_name.str);
return true;
}
if (!my_charset_same(m_ci, cl.charset_collation()))
if (!my_charset_same(m_ci, cl.charset_info()))
{
my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
cl.charset_collation()->coll_name.str, m_ci->cs_name.str);
cl.charset_info()->coll_name.str, m_ci->cs_name.str);
return true;
}
m_ci= cl.charset_collation();
m_ci= cl.charset_info();
m_with_collate= true;
return false;

case Lex_charset_collation_st::TYPE_COLLATE_CONTEXTUALLY_TYPED:
case Lex_exact_charset_extended_collation_attrs_st::TYPE_COLLATE_CONTEXTUALLY_TYPED:
if (cl.is_contextually_typed_collate_default())
{
/*
SET NAMES latin1 COLLATE DEFAULT;
ALTER TABLE t1 CONVERT TO CHARACTER SET latin1 COLLATE DEFAULT;
*/
CHARSET_INFO *tmp= Lex_charset_collation_st::find_default_collation(m_ci);
CHARSET_INFO *tmp= Lex_exact_charset_extended_collation_attrs_st::find_default_collation(m_ci);
if (!tmp)
return true;
m_ci= tmp;
Expand Down Expand Up @@ -298,8 +301,9 @@ Lex_explicit_charset_opt_collate::
COLLATE clauses (not belonging to a CHARACTER SET clause).
*/
bool
Lex_charset_collation_st::
merge_collate_clause_and_collate_clause(const Lex_charset_collation_st &cl)
Lex_exact_charset_extended_collation_attrs_st::
merge_collate_clause_and_collate_clause(
const Lex_exact_charset_extended_collation_attrs_st &cl)
{
/*
"BINARY" and "COLLATE DEFAULT" are not possible
Expand Down Expand Up @@ -329,11 +333,11 @@ Lex_charset_collation_st::
Note, we should perhaps eventually disallow double COLLATE clauses.
But for now let's just disallow only conflicting ones.
*/
if (charset_collation() != cl.charset_collation())
if (charset_info() != cl.charset_info())
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"COLLATE ", charset_collation()->coll_name.str,
"COLLATE ", cl.charset_collation()->coll_name.str);
"COLLATE ", charset_info()->coll_name.str,
"COLLATE ", cl.charset_info()->coll_name.str);
return true;
}
return false;
Expand Down
61 changes: 38 additions & 23 deletions sql/lex_charset.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
Resolution happens in Type_handler::Column_definition_prepare_stage1().
*/
struct Lex_charset_collation_st
struct Lex_exact_charset_extended_collation_attrs_st
{
public:
enum Type
Expand All @@ -60,7 +60,7 @@ struct Lex_charset_collation_st
#define LEX_CHARSET_COLLATION_TYPE_BITS 2
static_assert(((1<<LEX_CHARSET_COLLATION_TYPE_BITS)-1) >=
TYPE_COLLATE_CONTEXTUALLY_TYPED,
"Lex_charset_collation_st::Type bits check");
"Lex_exact_charset_extended_collation_attrs_st::Type bits");

protected:
CHARSET_INFO *m_ci;
Expand All @@ -74,6 +74,12 @@ struct Lex_charset_collation_st
m_ci= NULL;
m_type= TYPE_EMPTY;
}
void init(CHARSET_INFO *cs, Type type)
{
DBUG_ASSERT(cs || type == TYPE_EMPTY);
m_ci= cs;
m_type= type;
}
bool is_empty() const
{
return m_type == TYPE_EMPTY;
Expand Down Expand Up @@ -125,7 +131,7 @@ struct Lex_charset_collation_st
m_ci= cl;
m_type= TYPE_COLLATE_EXACT;
}
CHARSET_INFO *charset_collation() const
CHARSET_INFO *charset_info() const
{
return m_ci;
}
Expand All @@ -138,66 +144,75 @@ struct Lex_charset_collation_st
return m_type == TYPE_COLLATE_CONTEXTUALLY_TYPED;
}
CHARSET_INFO *resolved_to_character_set(CHARSET_INFO *cs) const;
bool merge_charset_clause_and_collate_clause(const Lex_charset_collation_st &cl);
bool merge_collate_clause_and_collate_clause(const Lex_charset_collation_st &cl);
bool merge_charset_clause_and_collate_clause(
const Lex_exact_charset_extended_collation_attrs_st &cl);
bool merge_collate_clause_and_collate_clause(
const Lex_exact_charset_extended_collation_attrs_st &cl);
};


/*
CHARACTER SET cs [COLLATE cl]
CHARACTER SET cs_exact [COLLATE cl_exact_or_context]
*/
class Lex_explicit_charset_opt_collate
class Lex_exact_charset_opt_extended_collate
{
CHARSET_INFO *m_ci;
bool m_with_collate;
public:
Lex_explicit_charset_opt_collate(CHARSET_INFO *ci, bool with_collate)
Lex_exact_charset_opt_extended_collate(CHARSET_INFO *ci, bool with_collate)
:m_ci(ci), m_with_collate(with_collate)
{
DBUG_ASSERT(m_ci);
// Item_func_set_collation uses non-default collations in "ci"
//DBUG_ASSERT(m_ci->default_flag() || m_with_collate);
}
/*
Merge to another COLLATE clause. So the full syntax looks like:
Add another COLLATE clause (exact or context).
So the full syntax looks like:
CHARACTER SET cs [COLLATE cl] ... COLLATE cl2
*/
bool merge_collate_or_error(const Lex_charset_collation_st &cl);
bool merge_opt_collate_or_error(const Lex_charset_collation_st &cl)
bool merge_collate_or_error(
const Lex_exact_charset_extended_collation_attrs_st &cl);
bool merge_opt_collate_or_error(
const Lex_exact_charset_extended_collation_attrs_st &cl)
{
if (cl.is_empty())
return false;
return merge_collate_or_error(cl);
}
CHARSET_INFO *charset_and_collation() const { return m_ci; }
CHARSET_INFO *charset_info() const { return m_ci; }
bool with_collate() const { return m_with_collate; }
};


class Lex_charset_collation: public Lex_charset_collation_st
class Lex_exact_charset_extended_collation_attrs:
public Lex_exact_charset_extended_collation_attrs_st
{
public:
Lex_charset_collation()
Lex_exact_charset_extended_collation_attrs()
{
init();
}
Lex_charset_collation(CHARSET_INFO *collation, Type type)
Lex_exact_charset_extended_collation_attrs(CHARSET_INFO *collation, Type type)
{
DBUG_ASSERT(collation || type == TYPE_EMPTY);
m_ci= collation;
m_type= type;
init(collation, type);
}
static Lex_charset_collation national(bool bin_mod)
static Lex_exact_charset_extended_collation_attrs national(bool bin_mod)
{
return bin_mod ?
Lex_charset_collation(&my_charset_utf8mb3_bin, TYPE_COLLATE_EXACT) :
Lex_charset_collation(&my_charset_utf8mb3_general_ci, TYPE_CHARACTER_SET);
Lex_exact_charset_extended_collation_attrs(&my_charset_utf8mb3_bin,
TYPE_COLLATE_EXACT) :
Lex_exact_charset_extended_collation_attrs(&my_charset_utf8mb3_general_ci,
TYPE_CHARACTER_SET);
}
};


using Lex_column_charset_collation_attrs_st = Lex_charset_collation_st;
using Lex_column_charset_collation_attrs = Lex_charset_collation;
using Lex_column_charset_collation_attrs_st =
Lex_exact_charset_extended_collation_attrs_st;

using Lex_column_charset_collation_attrs =
Lex_exact_charset_extended_collation_attrs;


#endif // LEX_CHARSET_INCLUDED
11 changes: 6 additions & 5 deletions sql/sql_yacc.yy
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ void _CONCAT_UNDERSCORED(turn_parser_debug_on,yyparse)()
Lex_length_and_dec_st Lex_length_and_dec;
Lex_cast_type_st Lex_cast_type;
Lex_field_type_st Lex_field_type;
Lex_charset_collation_st Lex_charset_collation;
Lex_exact_charset_extended_collation_attrs_st
Lex_exact_charset_extended_collation_attrs;
Lex_dyncol_type_st Lex_dyncol_type;
Lex_for_loop_st for_loop;
Lex_for_loop_bounds_st for_loop_bounds;
Expand Down Expand Up @@ -1378,7 +1379,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
field_type_misc
json_table_field_type

%type <Lex_charset_collation>
%type <Lex_exact_charset_extended_collation_attrs>
binary
opt_binary
opt_binary_and_compression
Expand Down Expand Up @@ -5789,7 +5790,7 @@ field_type_or_serial:
}
field_def
{
Lex_charset_collation tmp= $1.charset_collation_attrs();
auto tmp= $1.charset_collation_attrs();
if (tmp.merge_charset_clause_and_collate_clause($3))
MYSQL_YYABORT;
Lex->last_field->set_charset_collation_attrs(tmp);
Expand Down Expand Up @@ -6079,7 +6080,7 @@ field_type_string:
| nchar opt_field_length opt_bin_mod
{
$$.set(&type_handler_string, $2,
Lex_charset_collation::national($3));
Lex_exact_charset_extended_collation_attrs::national($3));
}
| BINARY opt_field_length
{
Expand All @@ -6096,7 +6097,7 @@ field_type_string:
| nvarchar opt_field_length opt_compressed opt_bin_mod
{
$$.set(&type_handler_varchar, $2,
Lex_charset_collation::national($4));
Lex_exact_charset_extended_collation_attrs::national($4));
}
| VARBINARY opt_field_length opt_compressed
{
Expand Down
Loading

0 comments on commit e7f635e

Please sign in to comment.