Skip to content

Commit

Permalink
MDEV-15938 - TINYTEXT CHARACTER SET utf8 COMPRESSED truncates data
Browse files Browse the repository at this point in the history
Unexpected data truncation may occur when storing data to compressed blob
column having multi byte variable length character sets.

The reason was incorrect number of characters limit was enforced for
blobs.
  • Loading branch information
Sergey Vojtovich committed Apr 30, 2018
1 parent 9a84980 commit 68cbabb
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 34 deletions.
9 changes: 9 additions & 0 deletions mysql-test/main/column_compression.result
Original file line number Diff line number Diff line change
Expand Up @@ -1394,3 +1394,12 @@ SET column_compression_threshold=0;
INSERT INTO t1 VALUES('a');
SET column_compression_threshold=DEFAULT;
DROP TABLE t1;
#
# MDEV-15938 - TINYTEXT CHARACTER SET utf8 COMPRESSED truncates data
#
CREATE TABLE t1(a TINYTEXT COMPRESSED, b TINYTEXT) CHARACTER SET utf8;
INSERT INTO t1 VALUES (REPEAT(_latin1'a', 254), REPEAT(_latin1'a', 254));
SELECT CHAR_LENGTH(a), CHAR_LENGTH(b), LEFT(a, 10), LEFT(b, 10) FROM t1;
CHAR_LENGTH(a) CHAR_LENGTH(b) LEFT(a, 10) LEFT(b, 10)
254 254 aaaaaaaaaa aaaaaaaaaa
DROP TABLE t1;
8 changes: 8 additions & 0 deletions mysql-test/main/column_compression.test
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,11 @@ SET column_compression_threshold=0;
INSERT INTO t1 VALUES('a');
SET column_compression_threshold=DEFAULT;
DROP TABLE t1;

--echo #
--echo # MDEV-15938 - TINYTEXT CHARACTER SET utf8 COMPRESSED truncates data
--echo #
CREATE TABLE t1(a TINYTEXT COMPRESSED, b TINYTEXT) CHARACTER SET utf8;
INSERT INTO t1 VALUES (REPEAT(_latin1'a', 254), REPEAT(_latin1'a', 254));
SELECT CHAR_LENGTH(a), CHAR_LENGTH(b), LEFT(a, 10), LEFT(b, 10) FROM t1;
DROP TABLE t1;
9 changes: 9 additions & 0 deletions mysql-test/main/column_compression_utf16.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#
# MDEV-15938 - TINYTEXT CHARACTER SET utf8 COMPRESSED truncates data
#
CREATE TABLE t1(a TINYTEXT COMPRESSED, b TINYTEXT) CHARACTER SET utf16;
INSERT INTO t1 VALUES (REPEAT(_latin1'a', 127), REPEAT(_latin1'a', 127));
SELECT CHAR_LENGTH(a), CHAR_LENGTH(b), LEFT(a, 10), LEFT(b, 10) FROM t1;
CHAR_LENGTH(a) CHAR_LENGTH(b) LEFT(a, 10) LEFT(b, 10)
127 127 aaaaaaaaaa aaaaaaaaaa
DROP TABLE t1;
9 changes: 9 additions & 0 deletions mysql-test/main/column_compression_utf16.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
--source include/have_utf16.inc

--echo #
--echo # MDEV-15938 - TINYTEXT CHARACTER SET utf8 COMPRESSED truncates data
--echo #
CREATE TABLE t1(a TINYTEXT COMPRESSED, b TINYTEXT) CHARACTER SET utf16;
INSERT INTO t1 VALUES (REPEAT(_latin1'a', 127), REPEAT(_latin1'a', 127));
SELECT CHAR_LENGTH(a), CHAR_LENGTH(b), LEFT(a, 10), LEFT(b, 10) FROM t1;
DROP TABLE t1;
61 changes: 28 additions & 33 deletions sql/field.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6968,23 +6968,23 @@ int Field_string::store(const char *from, size_t length,CHARSET_INFO *cs)
{
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
uint copy_length;
String_copier copier;
int rc;

/* See the comment for Field_long::store(long long) */
DBUG_ASSERT(!table || table->in_use == current_thd);

copy_length= copier.well_formed_copy(field_charset,
(char*) ptr, field_length,
cs, from, length,
field_length / field_charset->mbmaxlen);
rc= well_formed_copy_with_check((char*) ptr, field_length,
cs, from, length,
field_length / field_charset->mbmaxlen,
false, &copy_length);

/* Append spaces if the string was shorter than the field. */
if (copy_length < field_length)
field_charset->cset->fill(field_charset,(char*) ptr+copy_length,
field_length-copy_length,
field_charset->pad_char);

return check_conversion_status(&copier, from + length, cs, false);
return rc;
}


Expand Down Expand Up @@ -7517,19 +7517,16 @@ int Field_varstring::store(const char *from,size_t length,CHARSET_INFO *cs)
{
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
uint copy_length;
String_copier copier;
int rc;

copy_length= copier.well_formed_copy(field_charset,
(char*) ptr + length_bytes,
field_length,
cs, from, length,
field_length / field_charset->mbmaxlen);
if (length_bytes == 1)
*ptr= (uchar) copy_length;
else
int2store(ptr, copy_length);
rc= well_formed_copy_with_check((char*) get_data(), field_length,
cs, from, length,
field_length / field_charset->mbmaxlen,
true, &copy_length);

return check_conversion_status(&copier, from + length, cs, true);
store_length(copy_length);

return rc;
}


Expand Down Expand Up @@ -7952,7 +7949,7 @@ void Field_varstring::hash(ulong *nr, ulong *nr2)

int Field_longstr::compress(char *to, uint *to_length,
const char *from, uint length,
CHARSET_INFO *cs)
CHARSET_INFO *cs, size_t nchars)
{
THD *thd= get_thd();
char *buf= 0;
Expand All @@ -7961,19 +7958,14 @@ int Field_longstr::compress(char *to, uint *to_length,
if (String::needs_conversion_on_storage(length, cs, field_charset) ||
*to_length <= length)
{
String_copier copier;
const char *end= from + length;

if (!(buf= (char*) my_malloc(*to_length - 1, MYF(MY_WME))))
{
*to_length= 0;
return -1;
}

length= copier.well_formed_copy(field_charset, buf, *to_length - 1,
cs, from, length,
(*to_length - 1) / field_charset->mbmaxlen);
rc= check_conversion_status(&copier, end, cs, true);
rc= well_formed_copy_with_check(buf, *to_length - 1, cs, from, length,
nchars, true, &length);
from= buf;
}

Expand Down Expand Up @@ -8045,7 +8037,8 @@ int Field_varstring_compressed::store(const char *from, size_t length,
{
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
uint to_length= (uint)MY_MIN(field_length, field_charset->mbmaxlen * length + 1);
int rc= compress((char*) get_data(), &to_length, from, (uint)length, cs);
int rc= compress((char*) get_data(), &to_length, from, (uint) length, cs,
(to_length - 1) / field_charset->mbmaxlen);
store_length(to_length);
return rc;
}
Expand Down Expand Up @@ -8174,10 +8167,11 @@ int Field_blob::store(const char *from,size_t length,CHARSET_INFO *cs)
{
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
size_t copy_length, new_length;
String_copier copier;
uint copy_len;
char *tmp;
char buff[STRING_BUFFER_USUAL_SIZE];
String tmpstr(buff,sizeof(buff), &my_charset_bin);
int rc;

if (!length)
{
Expand Down Expand Up @@ -8244,13 +8238,13 @@ int Field_blob::store(const char *from,size_t length,CHARSET_INFO *cs)
bmove(ptr + packlength, (uchar*) &tmp, sizeof(char*));
return 0;
}
copy_length= copier.well_formed_copy(field_charset,
(char*) value.ptr(), (uint)new_length,
cs, from, length);
Field_blob::store_length(copy_length);
rc= well_formed_copy_with_check((char*) value.ptr(), (uint) new_length,
cs, from, length,
length, true, &copy_len);
Field_blob::store_length(copy_len);
bmove(ptr+packlength,(uchar*) &tmp,sizeof(char*));

return check_conversion_status(&copier, from + length, cs, true);
return rc;

oom_error:
/* Fatal OOM error */
Expand Down Expand Up @@ -8664,7 +8658,8 @@ int Field_blob_compressed::store(const char *from, size_t length,
if (value.alloc(to_length))
goto oom;

rc= compress((char*) value.ptr(), &to_length, tmp.ptr(), (uint) length, cs);
rc= compress((char*) value.ptr(), &to_length, tmp.ptr(), (uint) length, cs,
(uint) length);
set_ptr(to_length, (uchar*) value.ptr());
return rc;

Expand Down
16 changes: 15 additions & 1 deletion sql/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -1772,13 +1772,27 @@ class Field_longstr :public Field_str
return report_if_important_data(copier->source_end_pos(),
end, count_spaces);
}
int well_formed_copy_with_check(char *to, size_t to_length,
CHARSET_INFO *from_cs,
const char *from, size_t from_length,
size_t nchars, bool count_spaces,
uint *copy_length)
{
String_copier copier;

*copy_length= copier.well_formed_copy(field_charset, to, to_length,
from_cs, from, from_length,
nchars);

return check_conversion_status(&copier, from + from_length, from_cs, count_spaces);
}
bool cmp_to_string_with_same_collation(const Item_bool_func *cond,
const Item *item) const;
bool cmp_to_string_with_stricter_collation(const Item_bool_func *cond,
const Item *item) const;
int compress(char *to, uint *to_length,
const char *from, uint length,
CHARSET_INFO *cs);
CHARSET_INFO *cs, size_t nchars);
String *uncompress(String *val_buffer, String *val_ptr,
const uchar *from, uint from_length);
public:
Expand Down

0 comments on commit 68cbabb

Please sign in to comment.