Skip to content

Commit

Permalink
MDEV-19522 InnoDB commit fails when FTS_DOC_ID value is greater than …
Browse files Browse the repository at this point in the history
…4294967295

InnoDB commit fails when consecutive FTS_DOC_ID value
is greater than 4294967295.
Fix is that InnoDB should remove the delta FTS_DOC_ID
value limitations and fts should encode 8 byte value,
remove FTS_DOC_ID_MAX_STEP variable. Replaced the
fts0vlc.ic file with fts0vlc.h

fts_encode_int(): Should be able to encode 10 bytes value

fts_get_encoded_len(): Should get the length of the value
which has 10 bytes

fts_decode_vlc(): Add debug assertion to verify the maximum
length allowed is 10.

mach_read_uint64_little_endian(): Reads 64 bit stored in
little endian format

Added a unit test case which check for minimum and maximum
value to do the fts encoding
  • Loading branch information
Thirunarayanan authored and dr-m committed Oct 21, 2021
1 parent 6b4fad9 commit 8ce8c26
Show file tree
Hide file tree
Showing 18 changed files with 286 additions and 235 deletions.
2 changes: 0 additions & 2 deletions mysql-test/suite/innodb_fts/r/basic.result
Original file line number Diff line number Diff line change
Expand Up @@ -313,9 +313,7 @@ FTS_DOC_ID
65536
131071
drop table t1;
call mtr.add_suppression("\\[ERROR\\] InnoDB: Doc ID 20030101000000 is too big. Its difference with largest used Doc ID 0 cannot exceed or equal to 65535");
CREATE TABLE t1 (FTS_DOC_ID BIGINT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
title VARCHAR(200), FULLTEXT(title)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (NULL, NULL), (20030101000000, 20030102000000);
ERROR HY000: Invalid InnoDB FTS Doc ID
DROP TABLE t1;
21 changes: 21 additions & 0 deletions mysql-test/suite/innodb_fts/r/innodb_fts_misc_1.result
Original file line number Diff line number Diff line change
Expand Up @@ -972,3 +972,24 @@ SELECT * FROM information_schema.innodb_ft_deleted;
DOC_ID
DROP TABLE t1;
SET GLOBAL innodb_ft_aux_table=DEFAULT;
#
# MDEV-19522 InnoDB commit fails when FTS_DOC_ID value
# is greater than 4294967295
#
CREATE TABLE t1(
FTS_DOC_ID BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
f1 TEXT, f2 TEXT, PRIMARY KEY (FTS_DOC_ID),
FULLTEXT KEY (f1)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1,'txt','bbb');
UPDATE t1 SET FTS_DOC_ID = 4294967298;
SELECT * FROM t1 WHERE match(f1) against("txt");
FTS_DOC_ID f1 f2
4294967298 txt bbb
SET @@session.insert_id = 100000000000;
INSERT INTO t1(f1, f2) VALUES ('aaa', 'bbb');
CREATE FULLTEXT INDEX i ON t1 (f2);
SELECT * FROM t1 WHERE match(f2) against("bbb");
FTS_DOC_ID f1 f2
4294967298 txt bbb
100000000000 aaa bbb
DROP TABLE t1;
2 changes: 0 additions & 2 deletions mysql-test/suite/innodb_fts/t/basic.test
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,7 @@ insert into t1(f1, f2) values(3, "This is the third record");
select FTS_DOC_ID from t1;
drop table t1;

call mtr.add_suppression("\\[ERROR\\] InnoDB: Doc ID 20030101000000 is too big. Its difference with largest used Doc ID 0 cannot exceed or equal to 65535");
CREATE TABLE t1 (FTS_DOC_ID BIGINT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
title VARCHAR(200), FULLTEXT(title)) ENGINE=InnoDB;
--error 182
INSERT INTO t1 VALUES (NULL, NULL), (20030101000000, 20030102000000);
DROP TABLE t1;
18 changes: 18 additions & 0 deletions mysql-test/suite/innodb_fts/t/innodb_fts_misc_1.test
Original file line number Diff line number Diff line change
Expand Up @@ -942,3 +942,21 @@ SET GLOBAL innodb_ft_aux_table='test/t1';
SELECT * FROM information_schema.innodb_ft_deleted;
DROP TABLE t1;
SET GLOBAL innodb_ft_aux_table=DEFAULT;

--echo #
--echo # MDEV-19522 InnoDB commit fails when FTS_DOC_ID value
--echo # is greater than 4294967295
--echo #
CREATE TABLE t1(
FTS_DOC_ID BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
f1 TEXT, f2 TEXT, PRIMARY KEY (FTS_DOC_ID),
FULLTEXT KEY (f1)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1,'txt','bbb');
UPDATE t1 SET FTS_DOC_ID = 4294967298;
SELECT * FROM t1 WHERE match(f1) against("txt");
SET @@session.insert_id = 100000000000;
INSERT INTO t1(f1, f2) VALUES ('aaa', 'bbb');
CREATE FULLTEXT INDEX i ON t1 (f2);
SELECT * FROM t1 WHERE match(f2) against("bbb");
# Cleanup
DROP TABLE t1;
4 changes: 4 additions & 0 deletions storage/innobase/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,7 @@ IF(MSVC)
ENDIF()

ADD_SUBDIRECTORY(${CMAKE_SOURCE_DIR}/extra/mariabackup ${CMAKE_BINARY_DIR}/extra/mariabackup)

IF(WITH_UNIT_TESTS)
ADD_SUBDIRECTORY(unittest)
ENDIF()
10 changes: 5 additions & 5 deletions storage/innobase/fts/fts0fts.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Full Text Search interface
#include "fts0priv.h"
#include "fts0types.h"
#include "fts0types.ic"
#include "fts0vlc.ic"
#include "fts0vlc.h"
#include "fts0plugin.h"
#include "dict0priv.h"
#include "dict0stats.h"
Expand Down Expand Up @@ -1247,7 +1247,7 @@ fts_cache_node_add_positions(
ulint enc_len;
ulint last_pos;
byte* ptr_start;
ulint doc_id_delta;
doc_id_t doc_id_delta;

#ifdef UNIV_DEBUG
if (cache) {
Expand All @@ -1258,7 +1258,7 @@ fts_cache_node_add_positions(
ut_ad(doc_id >= node->last_doc_id);

/* Calculate the space required to store the ilist. */
doc_id_delta = (ulint)(doc_id - node->last_doc_id);
doc_id_delta = doc_id - node->last_doc_id;
enc_len = fts_get_encoded_len(doc_id_delta);

last_pos = 0;
Expand Down Expand Up @@ -1307,14 +1307,14 @@ fts_cache_node_add_positions(
ptr_start = ptr;

/* Encode the new fragment. */
ptr += fts_encode_int(doc_id_delta, ptr);
ptr = fts_encode_int(doc_id_delta, ptr);

last_pos = 0;
for (i = 0; i < ib_vector_size(positions); i++) {
ulint pos = *(static_cast<ulint*>(
ib_vector_get(positions, i)));

ptr += fts_encode_int(pos - last_pos, ptr);
ptr = fts_encode_int(pos - last_pos, ptr);
last_pos = pos;
}

Expand Down
10 changes: 6 additions & 4 deletions storage/innobase/fts/fts0opt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
#include "ut0list.h"
#include "zlib.h"
#include "fts0opt.h"
#include "fts0vlc.h"

/** The FTS optimize thread's work queue. */
ib_wqueue_t* fts_optimize_wq;
Expand Down Expand Up @@ -1116,7 +1117,7 @@ fts_optimize_encode_node(
ulint pos_enc_len;
doc_id_t doc_id_delta;
dberr_t error = DB_SUCCESS;
byte* src = enc->src_ilist_ptr;
const byte* src = enc->src_ilist_ptr;

if (node->first_doc_id == 0) {
ut_a(node->last_doc_id == 0);
Expand Down Expand Up @@ -1173,7 +1174,7 @@ fts_optimize_encode_node(

/* Encode the doc id. Cast to ulint, the delta should be small and
therefore no loss of precision. */
dst += fts_encode_int((ulint) doc_id_delta, dst);
dst = fts_encode_int(doc_id_delta, dst);

/* Copy the encoded pos array. */
memcpy(dst, src, pos_enc_len);
Expand Down Expand Up @@ -1220,7 +1221,8 @@ fts_optimize_node(
doc_id_t delta;
doc_id_t del_doc_id = FTS_NULL_DOC_ID;

delta = fts_decode_vlc(&enc->src_ilist_ptr);
delta = fts_decode_vlc(
(const byte**)&enc->src_ilist_ptr);

test_again:
/* Check whether the doc id is in the delete list, if
Expand Down Expand Up @@ -1248,7 +1250,7 @@ fts_optimize_node(

/* Skip the entries for this document. */
while (*enc->src_ilist_ptr) {
fts_decode_vlc(&enc->src_ilist_ptr);
fts_decode_vlc((const byte**)&enc->src_ilist_ptr);
}

/* Skip the end of word position marker. */
Expand Down
7 changes: 4 additions & 3 deletions storage/innobase/fts/fts0que.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
#include "fts0pars.h"
#include "fts0types.h"
#include "fts0plugin.h"
#include "fts0vlc.h"

#include <iomanip>
#include <vector>
Expand Down Expand Up @@ -3224,7 +3225,7 @@ fts_query_filter_doc_ids(
ulint len, /*!< in: doc id ilist size */
ibool calc_doc_count) /*!< in: whether to remember doc count */
{
byte* ptr = static_cast<byte*>(data);
const byte* ptr = static_cast<byte*>(data);
doc_id_t doc_id = 0;
ulint decoded = 0;
ib_rbt_t* doc_freqs = word_freq->doc_freqs;
Expand All @@ -3234,8 +3235,8 @@ fts_query_filter_doc_ids(
ulint freq = 0;
fts_doc_freq_t* doc_freq;
fts_match_t* match = NULL;
ulint last_pos = 0;
ulint pos = fts_decode_vlc(&ptr);
doc_id_t last_pos = 0;
doc_id_t pos = fts_decode_vlc(&ptr);

/* Some sanity checks. */
if (doc_id == 0) {
Expand Down
13 changes: 1 addition & 12 deletions storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8543,8 +8543,7 @@ calc_row_difference(
&& prebuilt->table->fts
&& innobase_strcasecmp(
field->field_name, FTS_DOC_ID_COL_NAME) == 0) {
doc_id = (doc_id_t) mach_read_from_n_little_endian(
n_ptr, 8);
doc_id = mach_read_uint64_little_endian(n_ptr);
if (doc_id == 0) {
return(DB_FTS_INVALID_DOCID);
}
Expand Down Expand Up @@ -8787,16 +8786,6 @@ calc_row_difference(
<< innodb_table->name;

return(DB_FTS_INVALID_DOCID);
} else if ((doc_id
- prebuilt->table->fts->cache->next_doc_id)
>= FTS_DOC_ID_MAX_STEP) {

ib::warn() << "Doc ID " << doc_id << " is too"
" big. Its difference with largest"
" Doc ID used " << prebuilt->table->fts
->cache->next_doc_id - 1
<< " cannot exceed or equal to "
<< FTS_DOC_ID_MAX_STEP;
}


Expand Down
18 changes: 7 additions & 11 deletions storage/innobase/handler/i_s.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits)
#include "fil0fil.h"
#include "fil0crypt.h"
#include "dict0crea.h"
#include "fts0vlc.h"

/** The latest successfully looked up innodb_fts_aux_table */
UNIV_INTERN table_id_t innodb_ft_aux_table_id;
Expand Down Expand Up @@ -2775,7 +2776,7 @@ i_s_fts_index_cache_fill_one_index(
/* Decrypt the ilist, and display Dod ID and word position */
for (ulint i = 0; i < ib_vector_size(word->nodes); i++) {
fts_node_t* node;
byte* ptr;
const byte* ptr;
ulint decoded = 0;
doc_id_t doc_id = 0;

Expand All @@ -2785,13 +2786,11 @@ i_s_fts_index_cache_fill_one_index(
ptr = node->ilist;

while (decoded < node->ilist_size) {
ulint pos = fts_decode_vlc(&ptr);

doc_id += pos;
doc_id += fts_decode_vlc(&ptr);

/* Get position info */
while (*ptr) {
pos = fts_decode_vlc(&ptr);

OK(field_store_string(
fields[I_S_FTS_WORD],
Expand All @@ -2812,7 +2811,7 @@ i_s_fts_index_cache_fill_one_index(
doc_id, true));

OK(fields[I_S_FTS_ILIST_DOC_POS]->store(
pos, true));
fts_decode_vlc(&ptr), true));

OK(schema_table_store_record(
thd, table));
Expand Down Expand Up @@ -3146,7 +3145,7 @@ i_s_fts_index_table_fill_one_fetch(
/* Decrypt the ilist, and display Dod ID and word position */
for (ulint i = 0; i < ib_vector_size(word->nodes); i++) {
fts_node_t* node;
byte* ptr;
const byte* ptr;
ulint decoded = 0;
doc_id_t doc_id = 0;

Expand All @@ -3156,13 +3155,10 @@ i_s_fts_index_table_fill_one_fetch(
ptr = node->ilist;

while (decoded < node->ilist_size) {
ulint pos = fts_decode_vlc(&ptr);

doc_id += pos;
doc_id += fts_decode_vlc(&ptr);

/* Get position info */
while (*ptr) {
pos = fts_decode_vlc(&ptr);

OK(field_store_string(
fields[I_S_FTS_WORD],
Expand All @@ -3181,7 +3177,7 @@ i_s_fts_index_table_fill_one_fetch(
longlong(doc_id), true));

OK(fields[I_S_FTS_ILIST_DOC_POS]->store(
pos, true));
fts_decode_vlc(&ptr), true));

OK(schema_table_store_record(
thd, table));
Expand Down
4 changes: 0 additions & 4 deletions storage/innobase/include/fts0fts.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,6 @@ those defined in mysql file ft_global.h */
/** Threshold where our optimize thread automatically kicks in */
#define FTS_OPTIMIZE_THRESHOLD 10000000

/** Threshold to avoid exhausting of doc ids. Consecutive doc id difference
should not exceed FTS_DOC_ID_MAX_STEP */
#define FTS_DOC_ID_MAX_STEP 65535

/** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */
#define FTS_MAX_WORD_LEN (HA_FT_MAXCHARLEN * 4)

Expand Down
33 changes: 0 additions & 33 deletions storage/innobase/include/fts0types.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,16 +314,6 @@ int fts_doc_id_cmp(
const void* p1, /*!< in: id1 */
const void* p2); /*!< in: id2 */

/******************************************************************//**
Decode and return the integer that was encoded using our VLC scheme.*/
UNIV_INLINE
ulint
fts_decode_vlc(
/*===========*/
/*!< out: value decoded */
byte** ptr); /*!< in: ptr to decode from, this ptr is
incremented by the number of bytes decoded */

/******************************************************************//**
Duplicate a string. */
UNIV_INLINE
Expand All @@ -338,28 +328,6 @@ fts_string_dup(
const fts_string_t* src, /*!< in: src string */
mem_heap_t* heap); /*!< in: heap to use */

/******************************************************************//**
Return length of val if it were encoded using our VLC scheme. */
UNIV_INLINE
ulint
fts_get_encoded_len(
/*================*/
/*!< out: length of value
encoded, in bytes */
ulint val); /*!< in: value to encode */

/******************************************************************//**
Encode an integer using our VLC scheme and return the length in bytes. */
UNIV_INLINE
ulint
fts_encode_int(
/*===========*/
/*!< out: length of value
encoded, in bytes */
ulint val, /*!< in: value to encode */
byte* buf); /*!< in: buffer, must have
enough space */

/******************************************************************//**
Get the selected FTS aux INDEX suffix. */
UNIV_INLINE
Expand All @@ -381,6 +349,5 @@ fts_select_index(
ulint len);

#include "fts0types.ic"
#include "fts0vlc.ic"

#endif /* INNOBASE_FTS0TYPES_H */
Loading

0 comments on commit 8ce8c26

Please sign in to comment.