Skip to content

Commit

Permalink
MDEV-26055: Improve adaptive flushing
Browse files Browse the repository at this point in the history
This is a 10.5 backport from 10.6
commit 9593ccc.

Adaptive flushing is enabled by setting innodb_max_dirty_pages_pct_lwm>0
(not default) and innodb_adaptive_flushing=ON (default).
There is also the parameter innodb_adaptive_flushing_lwm
(default: 10 per cent of the log capacity). It should enable some
adaptive flushing even when innodb_max_dirty_pages_pct_lwm=0.
That is not being changed here.

This idea was first presented by Inaam Rana several years ago,
and I discussed it with Jean-François Gagné at FOSDEM 2023.

buf_flush_page_cleaner(): When we are not near the log capacity limit
(neither buf_flush_async_lsn nor buf_flush_sync_lsn are set),
also try to move clean blocks from the buf_pool.LRU list to buf_pool.free
or initiate writes (but not the eviction) of dirty blocks, until
the remaining I/O capacity has been consumed.

buf_flush_LRU_list_batch(): Add the parameter bool evict, to specify
whether dirty least recently used pages (from buf_pool.LRU) should
be evicted immediately after they have been written out. Callers outside
buf_flush_page_cleaner() will pass evict=true, to retain the existing
behaviour.

buf_do_LRU_batch(): Add the parameter bool evict.
Return counts of evicted and flushed pages.

buf_flush_LRU(): Add the parameter bool evict.
Assume that the caller holds buf_pool.mutex and
will invoke buf_dblwr.flush_buffered_writes() afterwards.

buf_flush_list_holding_mutex(): A low-level variant of buf_flush_list()
whose caller must hold buf_pool.mutex and invoke
buf_dblwr.flush_buffered_writes() afterwards.

buf_flush_wait_batch_end_acquiring_mutex(): Remove. It is enough to have
buf_flush_wait_batch_end().

page_cleaner_flush_pages_recommendation(): Avoid some floating-point
arithmetics.

buf_flush_page(), buf_flush_check_neighbor(), buf_flush_check_neighbors(),
buf_flush_try_neighbors(): Rename the parameter "bool lru" to "bool evict".

buf_free_from_unzip_LRU_list_batch(): Remove the parameter.
Only actual page writes will contribute towards the limit.

buf_LRU_free_page(): Evict freed pages of temporary tables.

buf_pool.done_free: Broadcast whenever a block is freed
(and buf_pool.try_LRU_scan is set).

buf_pool_t::io_buf_t::reserve(): Retry indefinitely.
During the test encryption.innochecksum we easily run out of
these buffers for PAGE_COMPRESSED or ENCRYPTED pages.

Tested by Matthias Leich and Axel Schwenke
  • Loading branch information
dr-m committed Nov 16, 2023
1 parent 8b509a5 commit a3d0d5f
Show file tree
Hide file tree
Showing 6 changed files with 311 additions and 299 deletions.
78 changes: 55 additions & 23 deletions storage/innobase/buf/buf0buf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -410,9 +410,11 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage,
return (true);
}

buf_tmp_buffer_t* slot;

if (node.space->purpose == FIL_TYPE_TEMPORARY
&& innodb_encrypt_temporary_tables) {
buf_tmp_buffer_t* slot = buf_pool.io_buf_reserve();
slot = buf_pool.io_buf_reserve();
ut_a(slot);
slot->allocate();

Expand All @@ -431,7 +433,6 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage,
tablespace and page contains used key_version. This is true
also for pages first compressed and then encrypted. */

buf_tmp_buffer_t* slot;
uint key_version = buf_page_get_key_version(dst_frame, flags);

if (page_compressed && !key_version) {
Expand All @@ -444,7 +445,6 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage,
}

slot = buf_pool.io_buf_reserve();
ut_a(slot);
slot->allocate();

decompress_with_slot:
Expand Down Expand Up @@ -472,7 +472,6 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage,
}

slot = buf_pool.io_buf_reserve();
ut_a(slot);
slot->allocate();
ut_d(fil_page_type_validate(node.space, dst_frame));

Expand Down Expand Up @@ -1494,6 +1493,41 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
return(true); /* free_list was enough */
}

void buf_pool_t::io_buf_t::create(ulint n_slots)
{
this->n_slots= n_slots;
slots= static_cast<buf_tmp_buffer_t*>
(ut_malloc_nokey(n_slots * sizeof *slots));
memset((void*) slots, 0, n_slots * sizeof *slots);
}

void buf_pool_t::io_buf_t::close()
{
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
{
aligned_free(s->crypt_buf);
aligned_free(s->comp_buf);
}
ut_free(slots);
slots= nullptr;
n_slots= 0;
}

buf_tmp_buffer_t *buf_pool_t::io_buf_t::reserve()
{
for (;;)
{
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
if (s->acquire())
return s;
os_aio_wait_until_no_pending_writes();
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
if (s->acquire())
return s;
os_aio_wait_until_no_pending_reads();
}
}

/** Sets the global variable that feeds MySQL's innodb_buffer_pool_resize_status
to the specified string. The format and the following parameters are the
same as the ones used for printf(3).
Expand Down Expand Up @@ -1564,15 +1598,18 @@ inline bool buf_pool_t::withdraw_blocks()

block = next_block;
}
mysql_mutex_unlock(&mutex);

/* reserve free_list length */
if (UT_LIST_GET_LEN(withdraw) < withdraw_target) {
ulint n_flushed = buf_flush_LRU(
std::max<ulint>(withdraw_target
- UT_LIST_GET_LEN(withdraw),
srv_LRU_scan_depth));
buf_flush_wait_batch_end_acquiring_mutex(true);
srv_LRU_scan_depth),
true);
mysql_mutex_unlock(&buf_pool.mutex);
buf_dblwr.flush_buffered_writes();
mysql_mutex_lock(&buf_pool.mutex);
buf_flush_wait_batch_end(true);

if (n_flushed) {
MONITOR_INC_VALUE_CUMULATIVE(
Expand All @@ -1586,12 +1623,12 @@ inline bool buf_pool_t::withdraw_blocks()
/* relocate blocks/buddies in withdrawn area */
ulint count2 = 0;

mysql_mutex_lock(&mutex);
buf_page_t* bpage;
bpage = UT_LIST_GET_FIRST(LRU);
while (bpage != NULL) {
buf_page_t* next_bpage = UT_LIST_GET_NEXT(LRU, bpage);
if (bpage->zip.data != NULL
buf_pool_mutex_exit_forbid();
for (buf_page_t* bpage = UT_LIST_GET_FIRST(LRU), *next_bpage;
bpage; bpage = next_bpage) {
ut_ad(bpage->in_file());
next_bpage = UT_LIST_GET_NEXT(LRU, bpage);
if (UNIV_LIKELY_NULL(bpage->zip.data)
&& will_be_withdrawn(bpage->zip.data)
&& bpage->can_relocate()) {
buf_pool_mutex_exit_forbid();
Expand Down Expand Up @@ -2667,11 +2704,6 @@ buf_page_get_low(
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_SX_LATCH)
|| (rw_latch == RW_NO_LATCH));
ut_ad(!allow_ibuf_merge
|| mode == BUF_GET
|| mode == BUF_GET_POSSIBLY_FREED
|| mode == BUF_GET_IF_IN_POOL
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH);

if (err) {
*err = DB_SUCCESS;
Expand All @@ -2685,15 +2717,15 @@ buf_page_get_low(
replace any old pages, which were not evicted during DISCARD.
Skip the assertion on space_page_size. */
break;
case BUF_PEEK_IF_IN_POOL:
default:
ut_ad(!allow_ibuf_merge);
ut_ad(mode == BUF_PEEK_IF_IN_POOL);
break;
case BUF_GET_POSSIBLY_FREED:
case BUF_GET_IF_IN_POOL:
/* The caller may pass a dummy page size,
because it does not really matter. */
break;
default:
ut_error;
case BUF_GET_POSSIBLY_FREED:
break;
case BUF_GET_NO_LATCH:
ut_ad(rw_latch == RW_NO_LATCH);
/* fall through */
Expand Down
Loading

0 comments on commit a3d0d5f

Please sign in to comment.