Skip to content

Commit

Permalink
MDEV-32068 Some calls to buf_read_ahead_linear() seem to be useless
Browse files Browse the repository at this point in the history
The linear read-ahead (enabled by nonzero innodb_read_ahead_threshold)
works best if index leaf pages or undo log pages have been allocated
on adjacent page numbers. The read-ahead is assumed not to be helpful
in other types of page accesses, such as non-leaf index pages.

buf_page_get_low(): Do not invoke buf_page_t::set_accessed(),
buf_page_make_young_if_needed(), or buf_read_ahead_linear().
We will invoke them in those callers of buf_page_get_gen() or
buf_page_get() where it makes sense: the access is not
one-time-on-startup and the page and not going to be freed soon.

btr_copy_blob_prefix(), btr_pcur_move_to_next_page(),
trx_undo_get_prev_rec_from_prev_page(),
trx_undo_get_first_rec(), btr_cur_t::search_leaf(),
btr_cur_t::open_leaf(): Invoke buf_read_ahead_linear().

We will not invoke linear read-ahead in functions that would
essentially allocate or free pages, because pages that are
freshly allocated are expected to be initialized by buf_page_create()
and not read from the data file. Likewise, freeing pages should
not involve accessing any sibling pages, except for freeing
singly-linked lists of BLOB pages.

We will not invoke read-ahead in btr_cur_t::pessimistic_search_leaf()
or in a pessimistic operation of btr_cur_t::open_leaf(), because
it is assumed that pessimistic operations should be preceded by
optimistic operations, which should already have invoked read-ahead.

buf_page_make_young_if_needed(): Invoke also buf_page_t::set_accessed()
and return the result.

btr_cur_nonleaf_make_young(): Like buf_page_make_young_if_needed(),
but do not invoke buf_page_t::set_accessed().

Reviewed by: Vladislav Lesin
Tested by: Matthias Leich
  • Loading branch information
dr-m committed Dec 5, 2023
1 parent 768a736 commit f074223
Show file tree
Hide file tree
Showing 20 changed files with 146 additions and 66 deletions.
11 changes: 10 additions & 1 deletion storage/innobase/btr/btr0btr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,11 @@ ATTRIBUTE_COLD void btr_decryption_failed(const dict_index_t &index)
@param[in] merge whether change buffer merge should be attempted
@param[in,out] mtr mini-transaction
@param[out] err error code
@param[out] first set if this is a first-time access to the page
@return block */
buf_block_t *btr_block_get(const dict_index_t &index,
uint32_t page, rw_lock_type_t mode, bool merge,
mtr_t *mtr, dberr_t *err)
mtr_t *mtr, dberr_t *err, bool *first)
{
ut_ad(mode != RW_NO_LATCH);
dberr_t local_err;
Expand All @@ -242,6 +243,8 @@ buf_block_t *btr_block_get(const dict_index_t &index,
*err= DB_PAGE_CORRUPTED;
block= nullptr;
}
else if (!buf_page_make_young_if_needed(&block->page) && first)
*first= true;
}
else if (*err == DB_DECRYPTION_FAILED)
btr_decryption_failed(index);
Expand Down Expand Up @@ -302,6 +305,8 @@ btr_root_block_get(
*err= DB_CORRUPTION;
block= nullptr;
}
else
buf_page_make_young_if_needed(&block->page);
}
else if (*err == DB_DECRYPTION_FAILED)
btr_decryption_failed(*index);
Expand Down Expand Up @@ -553,8 +558,11 @@ btr_page_alloc_for_ibuf(
root->page.frame)),
0, RW_X_LATCH, nullptr, BUF_GET, mtr, err);
if (new_block)
{
buf_page_make_young_if_needed(&new_block->page);
*err= flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, new_block,
PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
}
ut_d(if (*err == DB_SUCCESS)
flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
return new_block;
Expand Down Expand Up @@ -1352,6 +1360,7 @@ btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset)
if (buf_block_t *root= buf_page_get(page_id_t(space->id, index->page),
space->zip_size(), RW_SX_LATCH, &mtr))
{
buf_page_make_young_if_needed(&root->page);
mtr.set_named_space(space);
page_set_autoinc(root, autoinc, &mtr, reset);
}
Expand Down
51 changes: 33 additions & 18 deletions storage/innobase/btr/btr0cur.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1263,7 +1263,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,

page_cur.block= block;
ut_ad(block == mtr->at_savepoint(block_savepoint));
ut_ad(rw_latch != RW_NO_LATCH);
const bool not_first_access{buf_page_make_young_if_needed(&block->page)};
#ifdef UNIV_ZIP_DEBUG
if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block))
ut_a(page_zip_validate(page_zip, block->page.frame, index()));
Expand Down Expand Up @@ -1542,6 +1542,9 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
case BTR_SEARCH_PREV: /* btr_pcur_move_to_prev() */
ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH);

if (!not_first_access)
buf_read_ahead_linear(page_id, zip_size, false);

if (page_has_prev(block->page.frame) &&
page_rec_is_first(page_cur.rec, block->page.frame))
{
Expand Down Expand Up @@ -1581,6 +1584,8 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
buf_mode= btr_op == BTR_DELETE_OP
? BUF_GET_IF_IN_POOL_OR_WATCH
: BUF_GET_IF_IN_POOL;
else if (!not_first_access)
buf_read_ahead_linear(page_id, zip_size, false);
break;
case BTR_MODIFY_TREE:
ut_ad(rw_latch == RW_X_LATCH);
Expand Down Expand Up @@ -1614,6 +1619,14 @@ ATTRIBUTE_COLD void mtr_t::index_lock_upgrade()
slot.type= MTR_MEMO_X_LOCK;
}

/** Mark a non-leaf page "least recently used", but avoid invoking
buf_page_t::set_accessed(), because we do not want linear read-ahead */
static void btr_cur_nonleaf_make_young(buf_page_t *bpage)
{
if (UNIV_UNLIKELY(buf_page_peek_if_too_old(bpage)))
buf_page_make_young(bpage);
}

ATTRIBUTE_COLD
dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
page_cur_mode_t mode, mtr_t *mtr)
Expand Down Expand Up @@ -1716,6 +1729,8 @@ dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
if (height != btr_page_get_level(block->page.frame))
goto corrupted;

btr_cur_nonleaf_make_young(&block->page);

#ifdef UNIV_ZIP_DEBUG
const page_zip_des_t *page_zip= buf_block_get_page_zip(block);
ut_a(!page_zip || page_zip_validate(page_zip, block->page.frame, index()));
Expand Down Expand Up @@ -1802,6 +1817,8 @@ dberr_t btr_cur_search_to_nth_level(ulint level,
btr_decryption_failed(*index);
goto func_exit;
}
else
btr_cur_nonleaf_make_young(&block->page);

#ifdef UNIV_ZIP_DEBUG
if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block))
Expand Down Expand Up @@ -1937,18 +1954,15 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
ut_ad(n_blocks < BTR_MAX_LEVELS);
ut_ad(savepoint + n_blocks == mtr->get_savepoint());

bool first_access= false;
buf_block_t* block=
btr_block_get(*index, page,
height ? upper_rw_latch : root_leaf_rw_latch,
!height, mtr, &err);
!height, mtr, &err, &first_access);
ut_ad(!block == (err != DB_SUCCESS));

if (!block)
{
if (err == DB_DECRYPTION_FAILED)
btr_decryption_failed(*index);
break;
}

if (first)
page_cur_set_before_first(block, &page_cur);
Expand Down Expand Up @@ -2032,10 +2046,16 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,

offsets= rec_get_offsets(page_cur.rec, index, offsets, 0, ULINT_UNDEFINED,
&heap);
page= btr_node_ptr_get_child_page_no(page_cur.rec, offsets);

ut_ad(latch_mode != BTR_MODIFY_TREE || upper_rw_latch == RW_X_LATCH);

if (latch_mode != BTR_MODIFY_TREE);
if (latch_mode != BTR_MODIFY_TREE)
{
if (!height && first && first_access)
buf_read_ahead_linear(page_id_t(block->page.id().space(), page),
block->page.zip_size(), false);
}
else if (btr_cur_need_opposite_intention(block->page, index->is_clust(),
lock_intention,
node_ptr_max_size, compress_limit,
Expand Down Expand Up @@ -2073,7 +2093,6 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
}

/* Go to the child node */
page= btr_node_ptr_get_child_page_no(page_cur.rec, offsets);
n_blocks++;
}

Expand Down Expand Up @@ -3840,22 +3859,14 @@ btr_cur_pess_upd_restore_supremum(

const page_id_t block_id{block->page.id()};
const page_id_t prev_id(block_id.space(), prev_page_no);
dberr_t err;
buf_block_t* prev_block
= buf_page_get_gen(prev_id, 0, RW_NO_LATCH, nullptr,
BUF_PEEK_IF_IN_POOL, mtr, &err);
/* Since we already held an x-latch on prev_block, it must
be available and not be corrupted unless the buffer pool got
corrupted somehow. */
= mtr->get_already_latched(prev_id, MTR_MEMO_PAGE_X_FIX);
if (UNIV_UNLIKELY(!prev_block)) {
return err;
return DB_CORRUPTION;
}
ut_ad(!memcmp_aligned<4>(prev_block->page.frame + FIL_PAGE_NEXT,
block->page.frame + FIL_PAGE_OFFSET, 4));

/* We must already have an x-latch on prev_block! */
ut_ad(mtr->memo_contains_flagged(prev_block, MTR_MEMO_PAGE_X_FIX));

lock_rec_reset_and_inherit_gap_locks(*prev_block, block_id,
PAGE_HEAP_NO_SUPREMUM,
page_rec_get_heap_no(rec));
Expand Down Expand Up @@ -6664,6 +6675,10 @@ btr_copy_blob_prefix(
mtr.commit();
return copied_len;
}
if (!buf_page_make_young_if_needed(&block->page)) {
buf_read_ahead_linear(id, 0, false);
}

page = buf_block_get_frame(block);

blob_header = page + offset;
Expand Down
17 changes: 13 additions & 4 deletions storage/innobase/btr/btr0pcur.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ Created 2/23/1996 Heikki Tuuri
*******************************************************/

#include "btr0pcur.h"
#include "ut0byte.h"
#include "buf0rea.h"
#include "rem0cmp.h"
#include "trx0trx.h"
#include "ibuf0ibuf.h"

/**************************************************************//**
Resets a persistent cursor object, freeing ::old_rec_buf if it is
Expand Down Expand Up @@ -261,13 +262,15 @@ static bool btr_pcur_optimistic_latch_leaves(buf_block_t *block,
buf_page_get_gen(page_id_t(id.space(), left_page_no), zip_size,
mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr);

if (left_block &&
btr_page_get_next(left_block->page.frame) != id.page_no())
if (!left_block);
else if (btr_page_get_next(left_block->page.frame) != id.page_no())
{
release_left_block:
mtr->release_last_page();
return false;
}
else
buf_page_make_young_if_needed(&left_block->page);
}

if (buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr))
Expand Down Expand Up @@ -539,10 +542,11 @@ btr_pcur_move_to_next_page(
}

dberr_t err;
bool first_access = false;
buf_block_t* next_block = btr_block_get(
*cursor->index(), next_page_no,
rw_lock_type_t(cursor->latch_mode & (RW_X_LATCH | RW_S_LATCH)),
page_is_leaf(page), mtr, &err);
page_is_leaf(page), mtr, &err, &first_access);

if (UNIV_UNLIKELY(!next_block)) {
return err;
Expand All @@ -561,6 +565,11 @@ btr_pcur_move_to_next_page(

const auto s = mtr->get_savepoint();
mtr->rollback_to_savepoint(s - 2, s - 1);
if (first_access) {
buf_read_ahead_linear(next_block->page.id(),
next_block->zip_size(),
ibuf_inside(mtr));
}
return DB_SUCCESS;
}

Expand Down
1 change: 0 additions & 1 deletion storage/innobase/btr/btr0sea.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1141,7 +1141,6 @@ btr_search_guess_on_hash(
}

block->page.fix();
block->page.set_accessed();
buf_page_make_young_if_needed(&block->page);
static_assert(ulint{MTR_MEMO_PAGE_S_FIX} == ulint{BTR_SEARCH_LEAF},
"");
Expand Down
14 changes: 0 additions & 14 deletions storage/innobase/buf/buf0buf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2268,7 +2268,6 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
}

bpage->set_accessed();
buf_page_make_young_if_needed(bpage);

#ifdef UNIV_DEBUG
Expand Down Expand Up @@ -2895,18 +2894,6 @@ buf_page_get_low(
ut_ad(page_id_t(page_get_space_id(block->page.frame),
page_get_page_no(block->page.frame))
== page_id);

if (mode == BUF_GET_POSSIBLY_FREED
|| mode == BUF_PEEK_IF_IN_POOL) {
return block;
}

const bool not_first_access{block->page.set_accessed()};
buf_page_make_young_if_needed(&block->page);
if (!not_first_access) {
buf_read_ahead_linear(page_id, block->zip_size(),
ibuf_inside(mtr));
}
}

return block;
Expand Down Expand Up @@ -3079,7 +3066,6 @@ bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block,

block->page.fix();
ut_ad(!block->page.is_read_fixed());
block->page.set_accessed();
buf_page_make_young_if_needed(&block->page);
mtr->memo_push(block, mtr_memo_type_t(rw_latch));
}
Expand Down
8 changes: 8 additions & 0 deletions storage/innobase/buf/buf0lru.cc
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,14 @@ void buf_page_make_young(buf_page_t *bpage)
mysql_mutex_unlock(&buf_pool.mutex);
}

bool buf_page_make_young_if_needed(buf_page_t *bpage)
{
const bool not_first{bpage->set_accessed()};
if (UNIV_UNLIKELY(buf_page_peek_if_too_old(bpage)))
buf_page_make_young(bpage);
return not_first;
}

/** Try to free a block. If bpage is a descriptor of a compressed-only
ROW_FORMAT=COMPRESSED page, the buf_page_t object will be freed as well.
The caller must hold buf_pool.mutex.
Expand Down
5 changes: 4 additions & 1 deletion storage/innobase/dict/dict0boot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ static constexpr page_id_t hdr_page_id{DICT_HDR_SPACE, DICT_HDR_PAGE_NO};
static buf_block_t *dict_hdr_get(mtr_t *mtr)
{
/* We assume that the DICT_HDR page is always readable and available. */
return buf_page_get_gen(hdr_page_id, 0, RW_X_LATCH, nullptr, BUF_GET, mtr);
buf_block_t *b=
buf_page_get_gen(hdr_page_id, 0, RW_X_LATCH, nullptr, BUF_GET, mtr);
buf_page_make_young_if_needed(&b->page);
return b;
}

/**********************************************************************//**
Expand Down
6 changes: 6 additions & 0 deletions storage/innobase/gis/gis0sea.cc
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,8 @@ rtr_pcur_getnext_from_path(
break;
}

buf_page_make_young_if_needed(&block->page);

page = buf_block_get_frame(block);
page_ssn = page_get_ssn_id(page);

Expand Down Expand Up @@ -683,6 +685,8 @@ dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple,
return err;
}

buf_page_make_young_if_needed(&block->page);

const page_t *page= buf_block_get_frame(block);
#ifdef UNIV_ZIP_DEBUG
if (rw_latch != RW_NO_LATCH) {
Expand Down Expand Up @@ -1703,6 +1707,8 @@ rtr_cur_restore_position(
goto func_exit;
}

buf_page_make_young_if_needed(&page_cursor->block->page);

/* Get the page SSN */
page = buf_block_get_frame(page_cursor->block);
page_ssn = page_get_ssn_id(page);
Expand Down
14 changes: 12 additions & 2 deletions storage/innobase/ibuf/ibuf0ibuf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,13 @@ ibuf_header_page_get(
buf_block_t* block = buf_page_get(
page_id_t(IBUF_SPACE_ID, FSP_IBUF_HEADER_PAGE_NO),
0, RW_X_LATCH, mtr);
if (UNIV_UNLIKELY(!block)) {
return nullptr;
}

buf_page_make_young_if_needed(&block->page);

return block ? block->page.frame : nullptr;
return block->page.frame;
}

/** Acquire the change buffer root page.
Expand All @@ -326,7 +331,12 @@ static buf_block_t *ibuf_tree_root_get(mtr_t *mtr, dberr_t *err= nullptr)
buf_block_t *block=
buf_page_get_gen(page_id_t{IBUF_SPACE_ID, FSP_IBUF_TREE_ROOT_PAGE_NO},
0, RW_SX_LATCH, nullptr, BUF_GET, mtr, err);
ut_ad(!block || ibuf.empty == page_is_empty(block->page.frame));
if (block)
{
ut_ad(ibuf.empty == page_is_empty(block->page.frame));
buf_page_make_young_if_needed(&block->page);
}

return block;
}

Expand Down
4 changes: 3 additions & 1 deletion storage/innobase/include/btr0btr.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,12 @@ ATTRIBUTE_COLD void btr_decryption_failed(const dict_index_t &index);
@param[in] merge whether change buffer merge should be attempted
@param[in,out] mtr mini-transaction
@param[out] err error code
@param[out] first set if this is a first-time access to the page
@return block */
buf_block_t *btr_block_get(const dict_index_t &index,
uint32_t page, rw_lock_type_t mode, bool merge,
mtr_t *mtr, dberr_t *err= nullptr);
mtr_t *mtr, dberr_t *err= nullptr,
bool *first= nullptr);

/**************************************************************//**
Gets the index id field of a page.
Expand Down
Loading

0 comments on commit f074223

Please sign in to comment.