Skip to content

Commit

Permalink
- MDEV-6695 Bad column name for UCS2 string literals
Browse files Browse the repository at this point in the history
  The Item_string constructors called set_name() on the source string,
  which was wrong because in case of UCS2/UTF16/UTF32 the source value
  might be a not well formed string (e.g. have incomplete leftmost character).
  Now set_name() is called on str_value after its copied 
  (with optionally left zero padding) from the source string.
- MDEV-6694 Illegal mix of collation with a PS parameter
  Item_param::convert_str_value() did not set repertoire.
  Introducing a new structure MY_STRING_METADATA to collect
  character length and repertoire of a string in a single loop,
  to avoid two separate loops. Adding a new class Item_basic_value::Metadata
  as a convenience wrapper around MY_STRING_METADATA, to reuse the
  code between Item_string and Item_param.
  • Loading branch information
Alexander Barkov committed Sep 4, 2014
1 parent bf4347e commit 9392d0e
Show file tree
Hide file tree
Showing 10 changed files with 255 additions and 122 deletions.
8 changes: 8 additions & 0 deletions include/m_ctype.h
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,14 @@ my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, size_t len);
my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len);


typedef struct
{
size_t char_length;
uint repertoire;
} MY_STRING_METADATA;

void my_string_metadata_get(MY_STRING_METADATA *metadata,
CHARSET_INFO *cs, const char *str, size_t len);
uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len);
my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
Expand Down
7 changes: 7 additions & 0 deletions mysql-test/r/ctype_ucs.result
Original file line number Diff line number Diff line change
Expand Up @@ -5333,5 +5333,12 @@ SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
PI
pi=3.141593
#
# MDEV-6695 Bad column name for UCS2 string literals
#
SET NAMES utf8, character_set_connection=ucs2;
SELECT 'a','aa';
a aa
a aa
#
# End of 10.0 tests
#
23 changes: 23 additions & 0 deletions mysql-test/r/ctype_utf8.result
Original file line number Diff line number Diff line change
Expand Up @@ -6008,5 +6008,28 @@ CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001'))
aa
DROP TABLE t1;
#
# MDEV-6694 Illegal mix of collation with a PS parameter
#
SET NAMES utf8;
CREATE TABLE t1 (a INT, b VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES (1,'a');
SELECT CONCAT(b,IF(a,'b','b')) FROM t1;
CONCAT(b,IF(a,'b','b'))
ab
PREPARE stmt FROM "SELECT CONCAT(b,IF(a,?,?)) FROM t1";
SET @b='b';
EXECUTE stmt USING @b,@b;
CONCAT(b,IF(a,?,?))
ab
SET @b='';
EXECUTE stmt USING @b,@b;
CONCAT(b,IF(a,?,?))
a
SET @b='я';
EXECUTE stmt USING @b,@b;
ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
#
# End of 10.0 tests
#
7 changes: 7 additions & 0 deletions mysql-test/t/ctype_ucs.test
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,13 @@ DROP TABLE t1;
--echo #
SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;

--echo #
--echo # MDEV-6695 Bad column name for UCS2 string literals
--echo #
SET NAMES utf8, character_set_connection=ucs2;
SELECT 'a','aa';


--echo #
--echo # End of 10.0 tests
--echo #
18 changes: 18 additions & 0 deletions mysql-test/t/ctype_utf8.test
Original file line number Diff line number Diff line change
Expand Up @@ -1719,6 +1719,24 @@ SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 X'61')) FROM t1;
SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001')) FROM t1;
DROP TABLE t1;

--echo #
--echo # MDEV-6694 Illegal mix of collation with a PS parameter
--echo #
SET NAMES utf8;
CREATE TABLE t1 (a INT, b VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES (1,'a');
SELECT CONCAT(b,IF(a,'b','b')) FROM t1;
PREPARE stmt FROM "SELECT CONCAT(b,IF(a,?,?)) FROM t1";
SET @b='b';
EXECUTE stmt USING @b,@b;
SET @b='';
EXECUTE stmt USING @b,@b;
SET @b='я';
--error ER_CANT_AGGREGATE_2COLLATIONS
EXECUTE stmt USING @b,@b;
DEALLOCATE PREPARE stmt;
DROP TABLE t1;


--echo #
--echo # End of 10.0 tests
Expand Down
75 changes: 29 additions & 46 deletions sql/item.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1073,10 +1073,14 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
name_length= 0;
return;
}
if (cs->ctype)
{
const char *str_start= str;

const char *str_start= str;
if (!cs->ctype || cs->mbminlen > 1)
{
str+= cs->cset->scan(cs, str, str + length, MY_SEQ_SPACES);
}
else
{
/*
This will probably need a better implementation in the future:
a function in CHARSET_INFO structure.
Expand All @@ -1086,21 +1090,21 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
length--;
str++;
}
if (str != str_start && !is_autogenerated_name)
{
char buff[SAFE_NAME_LEN];
strmake(buff, str_start,
MY_MIN(sizeof(buff)-1, length + (int) (str-str_start)));

if (length == 0)
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
ER_NAME_BECOMES_EMPTY, ER(ER_NAME_BECOMES_EMPTY),
buff);
else
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
ER_REMOVED_SPACES, ER(ER_REMOVED_SPACES),
buff);
}
}
if (str != str_start && !is_autogenerated_name)
{
char buff[SAFE_NAME_LEN];
strmake(buff, str_start,
MY_MIN(sizeof(buff)-1, length + (int) (str-str_start)));

if (length == 0)
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
ER_NAME_BECOMES_EMPTY, ER(ER_NAME_BECOMES_EMPTY),
buff);
else
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
ER_REMOVED_SPACES, ER(ER_REMOVED_SPACES),
buff);
}
if (!my_charset_same(cs, system_charset_info))
{
Expand Down Expand Up @@ -1269,27 +1273,11 @@ Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs)
SET @@arg= 1;
EXECUTE stms USING @arg;
result_type is STRING_RESULT at prepare time,
In the above example result_type is STRING_RESULT at prepare time,
and INT_RESULT at execution time.
*/
if (const_item())
{
if (state == NULL_VALUE)
return this;
uint cnv_errors;
String *ostr= val_str(&cnvstr);
if (!needs_charset_converter(tocs))
return this;
cnvitem->copy_value(ostr->ptr(), ostr->length(),
ostr->charset(), tocs, &cnv_errors);
if (cnv_errors)
return NULL;
if (ostr->charset() == &my_charset_bin && tocs != &my_charset_bin &&
!cnvitem->check_well_formed_result(true))
return NULL;
return cnvitem;
}
return this;
return !const_item() || state == NULL_VALUE ?
this : const_charset_converter(tocs, true);
}


Expand Down Expand Up @@ -3175,8 +3163,6 @@ Item_param::Item_param(uint pos_in_query_arg) :
value is set.
*/
maybe_null= 1;
cnvitem= new Item_string("", 0, &my_charset_bin, DERIVATION_COERCIBLE);
cnvstr.set(cnvbuf, sizeof(cnvbuf), &my_charset_bin);
}


Expand Down Expand Up @@ -3736,18 +3722,14 @@ bool Item_param::convert_str_value(THD *thd)
str_value.set_charset(value.cs_info.final_character_set_of_str_value);
/* Here str_value is guaranteed to be in final_character_set_of_str_value */

max_length= str_value.numchars() * str_value.charset()->mbmaxlen;

/* For the strings converted to numeric form within some functions */
decimals= NOT_FIXED_DEC;
/*
str_value_ptr is returned from val_str(). It must be not alloced
to prevent it's modification by val_str() invoker.
*/
str_value_ptr.set(str_value.ptr(), str_value.length(),
str_value.charset());
/* Synchronize item charset with value charset */
collation.set(str_value.charset(), DERIVATION_COERCIBLE);
/* Synchronize item charset and length with value charset */
fix_charset_and_length_from_str_value(DERIVATION_COERCIBLE);
}
return rc;
}
Expand Down Expand Up @@ -3777,7 +3759,8 @@ Item_param::clone_item()
case STRING_VALUE:
case LONG_DATA_VALUE:
return new Item_string(name, str_value.c_ptr_quick(), str_value.length(),
str_value.charset());
str_value.charset(),
collation.derivation, collation.repertoire);
case TIME_VALUE:
break;
case NO_VALUE:
Expand Down
Loading

0 comments on commit 9392d0e

Please sign in to comment.