Skip to content

Commit

Permalink
MDEV-15592 - Column COMPRESSED should select a 'high order' datatype
Browse files Browse the repository at this point in the history
Compressed blob columns didn't accept data at their capacity. E.g. storing
255 bytes to TINYBLOB results in "Data too long" error.

Now it is allowed assuming compression method was able to produce shorter
string (so that both metadata and compressed data fits blob) and
column_compression_threshold is lower than blob.

If no compression was performed, we still have to reserve additional byte
for metadata and thus we perform normal data truncation and return it's
status.
  • Loading branch information
Sergey Vojtovich committed May 11, 2018
1 parent 8ad12b6 commit c982924
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 29 deletions.
59 changes: 56 additions & 3 deletions mysql-test/main/column_compression.result
Original file line number Diff line number Diff line change
Expand Up @@ -1336,15 +1336,33 @@ a LENGTH(a)
DROP TABLE t1;
CREATE TABLE t1(a TINYTEXT COMPRESSED);
SET column_compression_threshold=300;
INSERT INTO t1 VALUES(REPEAT('a', 254));
INSERT INTO t1 VALUES(REPEAT(' ', 254));
INSERT INTO t1 VALUES(REPEAT('a', 255));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 255));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT('a', 256));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 256));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
Note 1265 Data truncated for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT('a', 257));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 257));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
Note 1265 Data truncated for column 'a' at row 1
SET column_compression_threshold=DEFAULT;
SELECT a, LENGTH(a) FROM t1;
a LENGTH(a)
254
SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1;
LEFT(a, 10) LENGTH(a)
254
254
254
254
aaaaaaaaaa 254
DROP TABLE t1;
# Corner case: VARCHAR(255) COMPRESSED must have 2 bytes pack length
CREATE TABLE t1(a VARCHAR(255) COMPRESSED);
Expand All @@ -1360,6 +1378,32 @@ SELECT a, LENGTH(a) FROM t1;
a LENGTH(a)
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 255
DROP TABLE t1;
CREATE TABLE t1(a VARCHAR(65531) COMPRESSED);
SET column_compression_threshold=65537;
INSERT INTO t1 VALUES(REPEAT('a', 65530));
INSERT INTO t1 VALUES(REPEAT(' ', 65530));
INSERT INTO t1 VALUES(REPEAT('a', 65531));
INSERT INTO t1 VALUES(REPEAT(' ', 65531));
INSERT INTO t1 VALUES(REPEAT('a', 65532));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 65532));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT('a', 65533));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 65533));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
SET column_compression_threshold=DEFAULT;
SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1, 2;
LEFT(a, 10) LENGTH(a)
65530
65531
65531
65531
aaaaaaaaaa 65530
aaaaaaaaaa 65531
DROP TABLE t1;
#
# MDEV-14929 - AddressSanitizer: memcpy-param-overlap in
# Field_longstr::compress
Expand Down Expand Up @@ -1419,3 +1463,12 @@ COLUMN_NAME CHARACTER_MAXIMUM_LENGTH CHARACTER_OCTET_LENGTH
a 10 10
b 10 30
DROP TABLE t1;
#
# MDEV-15592 - Column COMPRESSED should select a 'high order' datatype
#
CREATE TABLE t1(a TINYTEXT COMPRESSED);
INSERT INTO t1 VALUES(REPEAT('a', 255));
SELECT LEFT(a, 10), LENGTH(a) FROM t1;
LEFT(a, 10) LENGTH(a)
aaaaaaaaaa 255
DROP TABLE t1;
36 changes: 35 additions & 1 deletion mysql-test/main/column_compression.test
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,19 @@ DROP TABLE t1;

CREATE TABLE t1(a TINYTEXT COMPRESSED);
SET column_compression_threshold=300;
INSERT INTO t1 VALUES(REPEAT('a', 254));
INSERT INTO t1 VALUES(REPEAT(' ', 254));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 255));
INSERT INTO t1 VALUES(REPEAT(' ', 255));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 256));
INSERT INTO t1 VALUES(REPEAT(' ', 256));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 257));
INSERT INTO t1 VALUES(REPEAT(' ', 257));
SET column_compression_threshold=DEFAULT;
SELECT a, LENGTH(a) FROM t1;
SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1;
DROP TABLE t1;

--echo # Corner case: VARCHAR(255) COMPRESSED must have 2 bytes pack length
Expand All @@ -80,6 +88,22 @@ SET column_compression_threshold=DEFAULT;
SELECT a, LENGTH(a) FROM t1;
DROP TABLE t1;

CREATE TABLE t1(a VARCHAR(65531) COMPRESSED);
SET column_compression_threshold=65537;
INSERT INTO t1 VALUES(REPEAT('a', 65530));
INSERT INTO t1 VALUES(REPEAT(' ', 65530));
INSERT INTO t1 VALUES(REPEAT('a', 65531));
INSERT INTO t1 VALUES(REPEAT(' ', 65531));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 65532));
INSERT INTO t1 VALUES(REPEAT(' ', 65532));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 65533));
INSERT INTO t1 VALUES(REPEAT(' ', 65533));
SET column_compression_threshold=DEFAULT;
SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1, 2;
DROP TABLE t1;


--echo #
--echo # MDEV-14929 - AddressSanitizer: memcpy-param-overlap in
Expand Down Expand Up @@ -113,6 +137,7 @@ INSERT INTO t1 VALUES('a');
SET column_compression_threshold=DEFAULT;
DROP TABLE t1;


--echo #
--echo # MDEV-15938 - TINYTEXT CHARACTER SET utf8 COMPRESSED truncates data
--echo #
Expand All @@ -136,3 +161,12 @@ FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA='test' AND TABLE_NAME='t1' AND COLUMN_NAME IN ('a','b')
ORDER BY COLUMN_NAME;
DROP TABLE t1;


--echo #
--echo # MDEV-15592 - Column COMPRESSED should select a 'high order' datatype
--echo #
CREATE TABLE t1(a TINYTEXT COMPRESSED);
INSERT INTO t1 VALUES(REPEAT('a', 255));
SELECT LEFT(a, 10), LENGTH(a) FROM t1;
DROP TABLE t1;
71 changes: 47 additions & 24 deletions sql/field.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7920,10 +7920,13 @@ void Field_varstring::hash(ulong *nr, ulong *nr2)
Compress field
@param[out] to destination buffer for compressed data
@param[in,out] to_length in: size of to, out: compressed data length
@param[in] to_length size of to
@param[in] from data to compress
@param[in] length from length
@param[in] max_length truncate `from' to this length
@param[out] out_length compessed data length
@param[in] cs from character set
@param[in] nchars copy no more than "nchars" characters
In worst case (no compression performed) storage requirement is increased by
1 byte to store header. If it exceeds field length, normal data truncation is
Expand All @@ -7947,42 +7950,57 @@ void Field_varstring::hash(ulong *nr, ulong *nr2)
followed by compressed data.
*/

int Field_longstr::compress(char *to, uint *to_length,
int Field_longstr::compress(char *to, uint to_length,
const char *from, uint length,
uint max_length,
uint *out_length,
CHARSET_INFO *cs, size_t nchars)
{
THD *thd= get_thd();
char *buf= 0;
char *buf;
uint buf_length;
int rc= 0;

if (String::needs_conversion_on_storage(length, cs, field_charset) ||
*to_length <= length)
max_length < length)
{
if (!(buf= (char*) my_malloc(*to_length - 1, MYF(MY_WME))))
set_if_smaller(max_length, static_cast<ulonglong>(field_charset->mbmaxlen) * length + 1);
if (!(buf= (char*) my_malloc(max_length, MYF(MY_WME))))
{
*to_length= 0;
*out_length= 0;
return -1;
}

rc= well_formed_copy_with_check(buf, *to_length - 1, cs, from, length,
nchars, true, &length);
from= buf;
rc= well_formed_copy_with_check(buf, max_length, cs, from, length,
nchars, true, &buf_length);
}
else
{
buf= const_cast<char*>(from);
buf_length= length;
}

if (length == 0)
*to_length= 0;
else if (length >= thd->variables.column_compression_threshold &&
(*to_length= compression_method()->compress(thd, to, from, length)))
if (buf_length == 0)
*out_length= 0;
else if (buf_length >= thd->variables.column_compression_threshold &&
(*out_length= compression_method()->compress(thd, to, buf, buf_length)))
status_var_increment(thd->status_var.column_compressions);
else
{
/* Store uncompressed */
to[0]= 0;
memcpy(to + 1, from, length);
*to_length= length + 1;
if (buf_length < to_length)
memcpy(to + 1, buf, buf_length);
else
{
/* Storing string at blob capacity, e.g. 255 bytes string to TINYBLOB. */
rc= well_formed_copy_with_check(to + 1, to_length - 1, cs, from, length,
nchars, true, &buf_length);
}
*out_length= buf_length + 1;
}

if (buf)
if (buf != from)
my_free(buf);
return rc;
}
Expand Down Expand Up @@ -8036,10 +8054,12 @@ int Field_varstring_compressed::store(const char *from, size_t length,
CHARSET_INFO *cs)
{
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
uint to_length= (uint)MY_MIN(field_length, field_charset->mbmaxlen * length + 1);
int rc= compress((char*) get_data(), &to_length, from, (uint) length, cs,
(to_length - 1) / field_charset->mbmaxlen);
store_length(to_length);
uint compressed_length;
int rc= compress((char*) get_data(), field_length, from, (uint) length,
Field_varstring_compressed::max_display_length(),
&compressed_length, cs,
Field_varstring_compressed::char_length());
store_length(compressed_length);
return rc;
}

Expand Down Expand Up @@ -8648,7 +8668,10 @@ int Field_blob_compressed::store(const char *from, size_t length,
CHARSET_INFO *cs)
{
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
uint to_length= (uint)MY_MIN(max_data_length(), field_charset->mbmaxlen * length + 1);
uint compressed_length;
uint max_length= max_data_length();
uint to_length= (uint) MY_MIN(max_length,
field_charset->mbmaxlen * length + 1);
String tmp(from, length, cs);
int rc;

Expand All @@ -8658,9 +8681,9 @@ int Field_blob_compressed::store(const char *from, size_t length,
if (value.alloc(to_length))
goto oom;

rc= compress((char*) value.ptr(), &to_length, tmp.ptr(), (uint) length, cs,
(uint) length);
set_ptr(to_length, (uchar*) value.ptr());
rc= compress((char*) value.ptr(), to_length, tmp.ptr(), (uint) length,
max_length, &compressed_length, cs, (uint) length);
set_ptr(compressed_length, (uchar*) value.ptr());
return rc;

oom:
Expand Down
4 changes: 3 additions & 1 deletion sql/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -1790,8 +1790,10 @@ class Field_longstr :public Field_str
const Item *item) const;
bool cmp_to_string_with_stricter_collation(const Item_bool_func *cond,
const Item *item) const;
int compress(char *to, uint *to_length,
int compress(char *to, uint to_length,
const char *from, uint length,
uint max_length,
uint *out_length,
CHARSET_INFO *cs, size_t nchars);
String *uncompress(String *val_buffer, String *val_ptr,
const uchar *from, uint from_length);
Expand Down

0 comments on commit c982924

Please sign in to comment.