Skip to content
Permalink
Browse files
MDEV-27927 row_sel_try_search_shortcut_for_mysql() does not latch a p…
…age, violating read view isolation

btr_search_guess_on_hash() would only acquire an index page latch if it
is invoked with ahi_latch=NULL. If it's invoked from
row_sel_try_search_shortcut_for_mysql() with ahi_latch!=NULL, a page
will not be latched, and row_search_mvcc() will get a pointer to the
record, which can be changed by some other transaction before the record
was stored in result buffer with row_sel_store_mysql_rec() call.

ahi_latch argument of btr_cur_search_to_nth_level_func() and
btr_pcur_open_with_no_init_func() is used only for
row_sel_try_search_shortcut_for_mysql().
btr_cur_search_to_nth_level_func(..., ahi_latch !=0, ...) is invoked
only from btr_pcur_open_with_no_init_func(..., ahi_latch !=0, ...),
which, in turns, is invoked only from
row_sel_try_search_shortcut_for_mysql().

I suppose that separate case with ahi_latch!=0 was intentionally
implemented to protect row_sel_store_mysql_rec() call in
row_search_mvcc() just after row_sel_try_search_shortcut_for_mysql()
call. After the ahi_latch was moved from row_seach_mvcc() to
row_sel_try_search_shortcut_for_mysql(), there is no need in it at all
if btr_search_guess_on_hash() latches a page unconditionally. And if
btr_search_guess_on_hash() latched the page, any access to the record in
row_sel_try_search_shortcut_for_mysql() after btr_pcur_open_with_no_init()
call will be protected with the page latch.

The fix is to remove ahi_latch argument from
btr_pcur_open_with_no_init_func(), btr_cur_search_to_nth_level_func()
and btr_search_guess_on_hash().

There will not be test, as to test it we need to freeze some SELECT
execution in the point between row_sel_try_search_shortcut_for_mysql()
and row_sel_store_mysql_rec() calls in row_search_mvcc(), and to change
the record in some other transaction to let row_sel_store_mysql_rec() to
store changed record in result buffer. Buf we can't do this with the
fix, as the page will be latched in btr_search_guess_on_hash() call.
  • Loading branch information
vlad-lesin committed Oct 5, 2022
1 parent 111cbdf commit c0eda62
Show file tree
Hide file tree
Showing 16 changed files with 160 additions and 267 deletions.
@@ -876,7 +876,7 @@ btr_page_get_father_node_ptr_func(

err = btr_cur_search_to_nth_level(
index, level + 1, tuple,
PAGE_CUR_LE, latch_mode, cursor, 0,
PAGE_CUR_LE, latch_mode, cursor,
file, line, mtr);

if (err != DB_SUCCESS) {
@@ -2380,7 +2380,7 @@ btr_insert_on_non_leaf_level_func(
dberr_t err = btr_cur_search_to_nth_level(
index, level, tuple, PAGE_CUR_LE,
BTR_CONT_MODIFY_TREE,
&cursor, 0, file, line, mtr);
&cursor, file, line, mtr);

if (err != DB_SUCCESS) {
ib::warn() << " Error code: " << err
@@ -2401,7 +2401,7 @@ btr_insert_on_non_leaf_level_func(
btr_cur_search_to_nth_level(index, level, tuple,
PAGE_CUR_RTREE_INSERT,
BTR_CONT_MODIFY_TREE,
&cursor, 0, file, line, mtr);
&cursor, file, line, mtr);
}

ut_ad(cursor.flag == BTR_CUR_BINARY);
@@ -1102,38 +1102,36 @@ If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
search tuple should be performed in the B-tree. InnoDB does an insert
immediately after the cursor. Thus, the cursor may end up on a user record,
or on a page infimum record. */
dberr_t
btr_cur_search_to_nth_level_func(
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: the tree level of search */
const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
tuple must be set so that it cannot get
compared to the node ptr page number field! */
page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
Inserts should always be made using
PAGE_CUR_LE to search the position! */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
at most one of BTR_INSERT, BTR_DELETE_MARK,
BTR_DELETE, or BTR_ESTIMATE;
cursor->left_block is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
NOTE that if ahi_latch, we might not have a
cursor page latch, we assume that ahi_latch
protects the record! */
btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
#ifdef BTR_CUR_HASH_ADAPT
rw_lock_t* ahi_latch,
/*!< in: currently held btr_search_latch
(in RW_S_LATCH mode), or NULL */
#endif /* BTR_CUR_HASH_ADAPT */
const char* file, /*!< in: file name */
unsigned line, /*!< in: line where called */
mtr_t* mtr, /*!< in: mtr */
ib_uint64_t autoinc)/*!< in: PAGE_ROOT_AUTO_INC to be written
(0 if none) */
or on a page infimum record.
@param index index
@param level the tree level of search
@param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that
it cannot get compared to the node ptr page number field!
@param mode PAGE_CUR_L, NOTE that if the search is made using a unique
prefix of a record, mode should be PAGE_CUR_LE, not
PAGE_CUR_GE, as the latter may end up on the previous page of
the record! Inserts should always be made using PAGE_CUR_LE
to search the position!
@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT,
BTR_DELETE_MARK, BTR_DELETE, or BTR_ESTIMATE;
cursor->left_block is used to store a pointer to the left
neighbor page, in the cases BTR_SEARCH_PREV and
BTR_MODIFY_PREV; NOTE that if ahi_latch, we might not have a
cursor page latch, we assume that ahi_latch protects the
record!
@param cursor tree cursor; the cursor page is s- or x-latched, but see also
above!
@param file file name
@param line line where called
@param mtr mini-transaction
@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none)
@return DB_SUCCESS on success or error code otherwise */
dberr_t btr_cur_search_to_nth_level(dict_index_t *index, ulint level,
const dtuple_t *tuple,
page_cur_mode_t mode, ulint latch_mode,
btr_cur_t *cursor, const char *file,
unsigned line, mtr_t *mtr,
ib_uint64_t autoinc)
{
page_t* page = NULL; /* remove warning */
buf_block_t* block;
@@ -1301,15 +1299,14 @@ btr_cur_search_to_nth_level_func(
&& mode != PAGE_CUR_LE_OR_EXTENDS
# endif /* PAGE_CUR_LE_OR_EXTENDS */
&& !dict_index_is_spatial(index)
/* If !ahi_latch, we do a dirty read of
/* We do a dirty read of
btr_search_enabled below, and btr_search_guess_on_hash()
will have to check it again. */
&& btr_search_enabled
&& !modify_external
&& !(tuple->info_bits & REC_INFO_MIN_REC_FLAG)
&& btr_search_guess_on_hash(index, info, tuple, mode,
latch_mode, cursor,
ahi_latch, mtr)) {
latch_mode, cursor, mtr)) {

/* Search using the hash index succeeded */

@@ -1330,13 +1327,6 @@ btr_cur_search_to_nth_level_func(
/* If the hash search did not succeed, do binary search down the
tree */

#ifdef BTR_CUR_HASH_ADAPT
if (ahi_latch) {
/* Release possible search latch to obey latching order */
rw_lock_s_unlock(ahi_latch);
}
#endif /* BTR_CUR_HASH_ADAPT */

/* Store the position of the tree latch we push to mtr so that we
know how to release it when we have latched leaf node(s) */

@@ -2397,12 +2387,6 @@ btr_cur_search_to_nth_level_func(
cursor->rtr_info->mbr_adj = true;
}

#ifdef BTR_CUR_HASH_ADAPT
if (ahi_latch) {
rw_lock_s_lock(ahi_latch);
}
#endif /* BTR_CUR_HASH_ADAPT */

DBUG_RETURN(err);
}

@@ -6232,8 +6216,7 @@ btr_estimate_n_rows_in_range_low(

btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
&cursor, 0,
__FILE__, __LINE__, &mtr);
&cursor, __FILE__, __LINE__, &mtr);

ut_ad(!page_rec_is_infimum(btr_cur_get_rec(&cursor)));

@@ -6286,8 +6269,7 @@ btr_estimate_n_rows_in_range_low(

btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
&cursor, 0,
__FILE__, __LINE__, &mtr);
&cursor, __FILE__, __LINE__, &mtr);

const rec_t* rec = btr_cur_get_rec(&cursor);

@@ -424,11 +424,7 @@ btr_pcur_t::restore_position(ulint restore_latch_mode, const char *file,
}

btr_pcur_open_with_no_init_func(index, tuple, mode, restore_latch_mode,
this,
#ifdef BTR_CUR_HASH_ADAPT
NULL,
#endif /* BTR_CUR_HASH_ADAPT */
file, line, mtr);
this, file, line, mtr);

/* Restore the old search mode */
search_mode = old_mode;
@@ -895,8 +895,6 @@ both have sensible values.
we assume the caller uses his search latch
to protect the record!
@param[out] cursor tree cursor
@param[in] ahi_latch the adaptive hash index latch being held,
or NULL
@param[in] mtr mini transaction
@return whether the search succeeded */
bool
@@ -907,7 +905,6 @@ btr_search_guess_on_hash(
ulint mode,
ulint latch_mode,
btr_cur_t* cursor,
rw_lock_t* ahi_latch,
mtr_t* mtr)
{
ulint fold;
@@ -916,16 +913,13 @@ btr_search_guess_on_hash(
btr_cur_t cursor2;
btr_pcur_t pcur;
#endif
ut_ad(!ahi_latch || rw_lock_own_flagged(
ahi_latch, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));

if (!btr_search_enabled) {
return false;
}

ut_ad(index && info && tuple && cursor && mtr);
ut_ad(!dict_index_is_ibuf(index));
ut_ad(!ahi_latch || ahi_latch == btr_get_search_latch(index));
ut_ad((latch_mode == BTR_SEARCH_LEAF)
|| (latch_mode == BTR_MODIFY_LEAF));

@@ -956,54 +950,41 @@ btr_search_guess_on_hash(
cursor->fold = fold;
cursor->flag = BTR_CUR_HASH;

rw_lock_t* use_latch = ahi_latch ? NULL : btr_get_search_latch(index);
rw_lock_t* ahi_latch = btr_get_search_latch(index);
const rec_t* rec;

if (use_latch) {
rw_lock_s_lock(use_latch);
rw_lock_s_lock(ahi_latch);

if (!btr_search_enabled) {
goto fail;
}
} else {
ut_ad(btr_search_enabled);
ut_ad(rw_lock_own(ahi_latch, RW_LOCK_S));
if (!btr_search_enabled) {
goto fail;
}

rec = static_cast<const rec_t*>(
ha_search_and_get_data(btr_get_search_table(index), fold));

if (!rec) {
if (use_latch) {
fail:
rw_lock_s_unlock(use_latch);
}
rw_lock_s_unlock(ahi_latch);

btr_search_failure(info, cursor);
return false;
}

buf_block_t* block = buf_block_from_ahi(rec);

if (use_latch) {
if (!buf_page_get_known_nowait(
latch_mode, block, BUF_MAKE_YOUNG,
__FILE__, __LINE__, mtr)) {
goto fail;
}
if (!buf_page_get_known_nowait(latch_mode, block, BUF_MAKE_YOUNG,
__FILE__, __LINE__, mtr)) {
goto fail;
}

const bool fail = index != block->index
&& index_id == block->index->id;
ut_a(!fail || block->index->freed());
rw_lock_s_unlock(use_latch);
const bool fail = index != block->index
&& index_id == block->index->id;
ut_a(!fail || block->index->freed());
ut_ad(fail || !block->page.file_page_was_freed);
rw_lock_s_unlock(ahi_latch);

buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
if (UNIV_UNLIKELY(fail)) {
goto fail_and_release_page;
}
} else if (UNIV_UNLIKELY(index != block->index
&& index_id == block->index->id)) {
ut_a(block->index->freed());
buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
if (UNIV_UNLIKELY(fail)) {
goto fail_and_release_page;
}

@@ -1012,9 +993,7 @@ btr_search_guess_on_hash(
ut_ad(buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);

fail_and_release_page:
if (!ahi_latch) {
btr_leaf_page_release(block, latch_mode, mtr);
}
btr_leaf_page_release(block, latch_mode, mtr);

btr_search_failure(info, cursor);
return false;
@@ -1032,7 +1011,7 @@ btr_search_guess_on_hash(
record to determine if our guess for the cursor position is
right. */
if (index_id != btr_page_get_index_id(block->frame)
|| !btr_search_check_guess(cursor, !!ahi_latch, tuple, mode)) {
|| !btr_search_check_guess(cursor, 0, tuple, mode)) {
goto fail_and_release_page;
}

@@ -1081,7 +1060,7 @@ btr_search_guess_on_hash(
#ifdef UNIV_SEARCH_PERF_STAT
btr_search_n_succ++;
#endif
if (!ahi_latch && buf_page_peek_if_too_old(&block->page)) {
if (buf_page_peek_if_too_old(&block->page)) {

buf_page_make_young(&block->page);
}
@@ -4815,6 +4815,10 @@ buf_page_get_known_nowait(

ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);

/* The check for the page was not freed would already have been
performed when the block descriptor was acquired by the thread for the
first time.*/

buf_block_buf_fix_inc(block, file, line);

buf_page_set_accessed(&block->page);
@@ -4861,24 +4865,6 @@ buf_page_get_known_nowait(
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */

#ifdef UNIV_DEBUG
if (mode != BUF_KEEP_OLD) {
/* If mode == BUF_KEEP_OLD, we are executing an I/O
completion routine. Avoid a bogus assertion failure
when ibuf_merge_or_delete_for_page() is processing a
page that was just freed due to DROP INDEX, or
deleting a record from SYS_INDEXES. This check will be
skipped in recv_recover_page() as well. */

# ifdef BTR_CUR_HASH_ADAPT
ut_ad(!block->page.file_page_was_freed
|| btr_search_check_marked_free_index(block));
# else /* BTR_CUR_HASH_ADAPT */
ut_ad(!block->page.file_page_was_freed);
# endif /* BTR_CUR_HASH_ADAPT */
}
#endif /* UNIV_DEBUG */

buf_pool->stat.n_page_gets++;

return(TRUE);
@@ -5435,7 +5435,7 @@ dict_set_corrupted(

btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_LE,
BTR_MODIFY_LEAF,
&cursor, 0, __FILE__, __LINE__, &mtr);
&cursor, __FILE__, __LINE__, &mtr);

if (cursor.low_match == dtuple_get_n_fields(tuple)) {
/* UPDATE SYS_INDEXES SET TYPE=index->type
@@ -5538,7 +5538,7 @@ dict_index_set_merge_threshold(

btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_GE,
BTR_MODIFY_LEAF,
&cursor, 0, __FILE__, __LINE__, &mtr);
&cursor, __FILE__, __LINE__, &mtr);

if (cursor.up_match == dtuple_get_n_fields(tuple)
&& rec_get_n_fields_old(btr_cur_get_rec(&cursor))
@@ -3437,7 +3437,7 @@ fts_add_doc_by_id(

btr_pcur_open_with_no_init(
fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
&pcur, 0, &mtr);
&pcur, &mtr);

/* If we have a match, add the data to doc structure */
if (btr_pcur_get_low_match(&pcur) == 1) {
@@ -3475,7 +3475,7 @@ fts_add_doc_by_id(

btr_pcur_open_with_no_init(
clust_index, clust_ref, PAGE_CUR_LE,
BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
BTR_SEARCH_LEAF, &clust_pcur, &mtr);

doc_pcur = &clust_pcur;
clust_rec = btr_pcur_get_rec(&clust_pcur);

0 comments on commit c0eda62

Please sign in to comment.