Skip to content

Commit

Permalink
MDEV-26055: Improve adaptive flushing
Browse files Browse the repository at this point in the history
Adaptive flushing is enabled by setting innodb_max_dirty_pages_pct_lwm>0
(not default) and innodb_adaptive_flushing=ON (default).
There is also the parameter innodb_adaptive_flushing_lwm
(default: 10 per cent of the log capacity). It should enable some
adaptive flushing even when innodb_max_dirty_pages_pct_lwm=0.
That is not being changed here.

This idea was first presented by Inaam Rana several years ago,
and I discussed it with Jean-François Gagné at FOSDEM 2023.

buf_flush_page_cleaner(): When we are not near the log capacity limit
(neither buf_flush_async_lsn nor buf_flush_sync_lsn are set),
also try to move clean blocks from the buf_pool.LRU list to buf_pool.free
or initiate writes (but not the eviction) of dirty blocks, until
the remaining I/O capacity has been consumed.

buf_flush_LRU_list_batch(): Add the parameter bool evict, to specify
whether dirty least recently used pages (from buf_pool.LRU) should
be evicted immediately after they have been written out. Callers outside
buf_flush_page_cleaner() will pass evict=true, to retain the existing
behaviour.

buf_do_LRU_batch(): Add the parameter bool evict.
Return counts of evicted and flushed pages.

buf_flush_LRU(): Add the parameter bool evict.
Assume that the caller holds buf_pool.mutex and
will invoke buf_dblwr.flush_buffered_writes() afterwards.

buf_flush_list_holding_mutex(): A low-level variant of buf_flush_list()
whose caller must hold buf_pool.mutex and invoke
buf_dblwr.flush_buffered_writes() afterwards.

buf_flush_wait_batch_end_acquiring_mutex(): Remove. It is enough to have
buf_flush_wait_batch_end().

page_cleaner_flush_pages_recommendation(): Avoid some floating-point
arithmetics.

buf_flush_page(), buf_flush_check_neighbor(), buf_flush_check_neighbors(),
buf_flush_try_neighbors(): Rename the parameter "bool lru" to "bool evict".

buf_free_from_unzip_LRU_list_batch(): Remove the parameter.
Only actual page writes will contribute towards the limit.

buf_LRU_free_page(): Evict freed pages of temporary tables.

buf_pool.done_free: Broadcast whenever a block is freed
(and buf_pool.try_LRU_scan is set).

buf_pool_t::io_buf_t::reserve(): Retry indefinitely.
During the test encryption.innochecksum we easily run out of
these buffers for PAGE_COMPRESSED or ENCRYPTED pages.

Tested by Matthias Leich and Axel Schwenke
  • Loading branch information
dr-m committed Mar 16, 2023
1 parent 4105017 commit 9593ccc
Show file tree
Hide file tree
Showing 5 changed files with 360 additions and 354 deletions.
194 changes: 112 additions & 82 deletions storage/innobase/buf/buf0buf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,6 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage,
if (id.space() == SRV_TMP_SPACE_ID
&& innodb_encrypt_temporary_tables) {
slot = buf_pool.io_buf_reserve();
ut_a(slot);
slot->allocate();
bool ok = buf_tmp_page_decrypt(slot->crypt_buf, dst_frame);
slot->release();
Expand All @@ -431,7 +430,6 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage,
}

slot = buf_pool.io_buf_reserve();
ut_a(slot);
slot->allocate();

decompress_with_slot:
Expand All @@ -455,7 +453,6 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage,
}

slot = buf_pool.io_buf_reserve();
ut_a(slot);
slot->allocate();

/* decrypt using crypt_buf to dst_frame */
Expand Down Expand Up @@ -1293,6 +1290,41 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
return(true); /* free_list was enough */
}

void buf_pool_t::io_buf_t::create(ulint n_slots)
{
this->n_slots= n_slots;
slots= static_cast<buf_tmp_buffer_t*>
(ut_malloc_nokey(n_slots * sizeof *slots));
memset((void*) slots, 0, n_slots * sizeof *slots);
}

void buf_pool_t::io_buf_t::close()
{
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
{
aligned_free(s->crypt_buf);
aligned_free(s->comp_buf);
}
ut_free(slots);
slots= nullptr;
n_slots= 0;
}

buf_tmp_buffer_t *buf_pool_t::io_buf_t::reserve()
{
for (;;)
{
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
if (s->acquire())
return s;
os_aio_wait_until_no_pending_writes();
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
if (s->acquire())
return s;
os_aio_wait_until_no_pending_reads();
}
}

/** Sets the global variable that feeds MySQL's innodb_buffer_pool_resize_status
to the specified string. The format and the following parameters are the
same as the ones used for printf(3).
Expand Down Expand Up @@ -1359,21 +1391,23 @@ inline bool buf_pool_t::withdraw_blocks()

block = next_block;
}
mysql_mutex_unlock(&mutex);

/* reserve free_list length */
if (UT_LIST_GET_LEN(withdraw) < withdraw_target) {
buf_flush_LRU(
std::max<ulint>(withdraw_target
- UT_LIST_GET_LEN(withdraw),
srv_LRU_scan_depth));
buf_flush_wait_batch_end_acquiring_mutex(true);
srv_LRU_scan_depth),
true);
mysql_mutex_unlock(&buf_pool.mutex);
buf_dblwr.flush_buffered_writes();
mysql_mutex_lock(&buf_pool.mutex);
buf_flush_wait_batch_end(true);
}

/* relocate blocks/buddies in withdrawn area */
ulint count2 = 0;

mysql_mutex_lock(&mutex);
buf_pool_mutex_exit_forbid();
for (buf_page_t* bpage = UT_LIST_GET_FIRST(LRU), *next_bpage;
bpage; bpage = next_bpage) {
Expand Down Expand Up @@ -2380,27 +2414,22 @@ buf_page_get_low(
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_SX_LATCH)
|| (rw_latch == RW_NO_LATCH));
ut_ad(!allow_ibuf_merge
|| mode == BUF_GET
|| mode == BUF_GET_POSSIBLY_FREED
|| mode == BUF_GET_IF_IN_POOL
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH);

if (err) {
*err = DB_SUCCESS;
}

#ifdef UNIV_DEBUG
switch (mode) {
case BUF_PEEK_IF_IN_POOL:
default:
ut_ad(!allow_ibuf_merge);
ut_ad(mode == BUF_PEEK_IF_IN_POOL);
break;
case BUF_GET_POSSIBLY_FREED:
case BUF_GET_IF_IN_POOL:
/* The caller may pass a dummy page size,
because it does not really matter. */
break;
default:
MY_ASSERT_UNREACHABLE();
case BUF_GET_POSSIBLY_FREED:
break;
case BUF_GET:
case BUF_GET_IF_IN_POOL_OR_WATCH:
ut_ad(!mtr->is_freeing_tree());
Expand Down Expand Up @@ -2547,11 +2576,12 @@ buf_page_get_low(
return nullptr;
}
} else if (mode != BUF_PEEK_IF_IN_POOL) {
} else if (!mtr) {
} else if (!mtr) {
ut_ad(!block->page.oldest_modification());
mysql_mutex_lock(&buf_pool.mutex);
block->unfix();

free_unfixed_block:
if (!buf_LRU_free_page(&block->page, true)) {
ut_ad(0);
}
Expand Down Expand Up @@ -2667,20 +2697,19 @@ buf_page_get_low(

/* Decompress the page while not holding
buf_pool.mutex. */
auto ok = buf_zip_decompress(block, false);
block->page.read_unfix(state);
state = block->page.state();
block->page.lock.x_unlock();
const auto ok = buf_zip_decompress(block, false);
--buf_pool.n_pend_unzip;

if (!ok) {
/* FIXME: Evict the corrupted
ROW_FORMAT=COMPRESSED page! */

if (err) {
*err = DB_PAGE_CORRUPTED;
}
return nullptr;
mysql_mutex_lock(&buf_pool.mutex);
}
state = block->page.read_unfix(state);
block->page.lock.x_unlock();

if (!ok) {
goto free_unfixed_block;
}
}

Expand Down Expand Up @@ -2886,72 +2915,73 @@ buf_page_get_gen(
dberr_t* err,
bool allow_ibuf_merge)
{
if (buf_block_t *block= recv_sys.recover(page_id))
buf_block_t *block= recv_sys.recover(page_id);
if (UNIV_LIKELY(!block))
return buf_page_get_low(page_id, zip_size, rw_latch,
guess, mode, mtr, err, allow_ibuf_merge);
else if (UNIV_UNLIKELY(block == reinterpret_cast<buf_block_t*>(-1)))
{
if (UNIV_UNLIKELY(block == reinterpret_cast<buf_block_t*>(-1)))
{
corrupted:
if (err)
*err= DB_CORRUPTION;
return nullptr;
}
/* Recovery is a special case; we fix() before acquiring lock. */
auto s= block->page.fix();
ut_ad(s >= buf_page_t::FREED);
/* The block may be write-fixed at this point because we are not
holding a lock, but it must not be read-fixed. */
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
corrupted:
if (err)
*err= DB_SUCCESS;
const bool must_merge= allow_ibuf_merge &&
ibuf_page_exists(page_id, block->zip_size());
*err= DB_CORRUPTION;
return nullptr;
}
/* Recovery is a special case; we fix() before acquiring lock. */
auto s= block->page.fix();
ut_ad(s >= buf_page_t::FREED);
/* The block may be write-fixed at this point because we are not
holding a lock, but it must not be read-fixed. */
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
if (err)
*err= DB_SUCCESS;
const bool must_merge= allow_ibuf_merge &&
ibuf_page_exists(page_id, block->zip_size());
if (s < buf_page_t::UNFIXED)
{
got_freed_page:
ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL);
mysql_mutex_lock(&buf_pool.mutex);
block->page.unfix();
buf_LRU_free_page(&block->page, true);
mysql_mutex_unlock(&buf_pool.mutex);
goto corrupted;
}
else if (must_merge &&
fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX &&
page_is_leaf(block->page.frame))
{
block->page.lock.x_lock();
s= block->page.state();
ut_ad(s > buf_page_t::FREED);
ut_ad(s < buf_page_t::READ_FIX);
if (s < buf_page_t::UNFIXED)
{
got_freed_page:
ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL);
block->page.unfix();
goto corrupted;
block->page.lock.x_unlock();
goto got_freed_page;
}
else if (must_merge &&
fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX &&
page_is_leaf(block->page.frame))
else
{
block->page.lock.x_lock();
s= block->page.state();
ut_ad(s > buf_page_t::FREED);
ut_ad(s < buf_page_t::READ_FIX);
if (s < buf_page_t::UNFIXED)
if (block->page.is_ibuf_exist())
block->page.clear_ibuf_exist();
if (dberr_t e=
ibuf_merge_or_delete_for_page(block, page_id, block->zip_size()))
{
block->page.lock.x_unlock();
goto got_freed_page;
}
else
{
if (block->page.is_ibuf_exist())
block->page.clear_ibuf_exist();
if (dberr_t e=
ibuf_merge_or_delete_for_page(block, page_id, block->zip_size()))
{
if (err)
*err= e;
buf_pool.corrupted_evict(&block->page, s);
return nullptr;
}
if (err)
*err= e;
buf_pool.corrupted_evict(&block->page, s);
return nullptr;
}
}

if (rw_latch == RW_X_LATCH)
{
mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
return block;
}
block->page.lock.x_unlock();
if (rw_latch == RW_X_LATCH)
{
mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
return block;
}
mtr->page_lock(block, rw_latch);
return block;
block->page.lock.x_unlock();
}

return buf_page_get_low(page_id, zip_size, rw_latch,
guess, mode, mtr, err, allow_ibuf_merge);
mtr->page_lock(block, rw_latch);
return block;
}

/********************************************************************//**
Expand Down
Loading

0 comments on commit 9593ccc

Please sign in to comment.