Skip to content

Commit d34479d

Browse files
committed
MDEV-33053 InnoDB LRU flushing does not run before running out of buffer pool
buf_flush_LRU(): Display a warning if no pages could be evicted and no writes initiated. buf_pool_t::need_LRU_eviction(): Renamed from buf_pool_t::ran_out(). Check if the amount of free pages is smaller than innodb_lru_scan_depth instead of checking if it is 0. buf_flush_page_cleaner(): For the final LRU flush after a checkpoint flush, use a "budget" of innodb_io_capacity_max, like we do in the case when we are not in "furious" checkpoint flushing. Co-developed by: Debarun Banerjee Reviewed by: Debarun Banerjee Tested by: Matthias Leich
1 parent 16f2f8e commit d34479d

File tree

3 files changed

+82
-29
lines changed

3 files changed

+82
-29
lines changed

storage/innobase/buf/buf0flu.cc

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,6 +1797,28 @@ ulint buf_flush_LRU(ulint max_n, bool evict)
17971797
buf_pool.try_LRU_scan= true;
17981798
pthread_cond_broadcast(&buf_pool.done_free);
17991799
}
1800+
else if (!pages && !buf_pool.try_LRU_scan &&
1801+
buf_pool.LRU_warned.test_and_set(std::memory_order_acquire))
1802+
{
1803+
/* For example, with the minimum innodb_buffer_pool_size=5M and
1804+
the default innodb_page_size=16k there are only a little over 316
1805+
pages in the buffer pool. The buffer pool can easily be exhausted
1806+
by a workload of some dozen concurrent connections. The system could
1807+
reach a deadlock like the following:
1808+
1809+
(1) Many threads are waiting in buf_LRU_get_free_block()
1810+
for buf_pool.done_free.
1811+
(2) Some threads are waiting for a page latch which is held by
1812+
another thread that is waiting in buf_LRU_get_free_block().
1813+
(3) This thread is the only one that could make progress, but
1814+
we fail to do so because all the pages that we scanned are
1815+
buffer-fixed or latched by some thread. */
1816+
sql_print_warning("InnoDB: Could not free any blocks in the buffer pool!"
1817+
" %zu blocks are in use and %zu free."
1818+
" Consider increasing innodb_buffer_pool_size.",
1819+
UT_LIST_GET_LEN(buf_pool.LRU),
1820+
UT_LIST_GET_LEN(buf_pool.free));
1821+
}
18001822

18011823
return pages;
18021824
}
@@ -2287,6 +2309,16 @@ static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in,
22872309
goto func_exit;
22882310
}
22892311

2312+
TPOOL_SUPPRESS_TSAN
2313+
bool buf_pool_t::need_LRU_eviction() const
2314+
{
2315+
/* try_LRU_scan==false means that buf_LRU_get_free_block() is waiting
2316+
for buf_flush_page_cleaner() to evict some blocks */
2317+
return UNIV_UNLIKELY(!try_LRU_scan ||
2318+
(UT_LIST_GET_LEN(LRU) > BUF_LRU_MIN_LEN &&
2319+
UT_LIST_GET_LEN(free) < srv_LRU_scan_depth / 2));
2320+
}
2321+
22902322
/** page_cleaner thread tasked with flushing dirty pages from the buffer
22912323
pools. As of now we'll have only one coordinator. */
22922324
static void buf_flush_page_cleaner()
@@ -2319,21 +2351,24 @@ static void buf_flush_page_cleaner()
23192351
}
23202352

23212353
mysql_mutex_lock(&buf_pool.flush_list_mutex);
2322-
if (buf_pool.ran_out())
2323-
goto no_wait;
2324-
else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
2325-
break;
2354+
if (!buf_pool.need_LRU_eviction())
2355+
{
2356+
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
2357+
break;
23262358

2327-
if (buf_pool.page_cleaner_idle() &&
2328-
(!UT_LIST_GET_LEN(buf_pool.flush_list) ||
2329-
srv_max_dirty_pages_pct_lwm == 0.0))
2330-
/* We are idle; wait for buf_pool.page_cleaner_wakeup() */
2331-
my_cond_wait(&buf_pool.do_flush_list,
2332-
&buf_pool.flush_list_mutex.m_mutex);
2333-
else
2334-
my_cond_timedwait(&buf_pool.do_flush_list,
2335-
&buf_pool.flush_list_mutex.m_mutex, &abstime);
2336-
no_wait:
2359+
if (buf_pool.page_cleaner_idle() &&
2360+
(!UT_LIST_GET_LEN(buf_pool.flush_list) ||
2361+
srv_max_dirty_pages_pct_lwm == 0.0))
2362+
{
2363+
buf_pool.LRU_warned.clear(std::memory_order_release);
2364+
/* We are idle; wait for buf_pool.page_cleaner_wakeup() */
2365+
my_cond_wait(&buf_pool.do_flush_list,
2366+
&buf_pool.flush_list_mutex.m_mutex);
2367+
}
2368+
else
2369+
my_cond_timedwait(&buf_pool.do_flush_list,
2370+
&buf_pool.flush_list_mutex.m_mutex, &abstime);
2371+
}
23372372
set_timespec(abstime, 1);
23382373

23392374
lsn_limit= buf_flush_sync_lsn;
@@ -2365,7 +2400,7 @@ static void buf_flush_page_cleaner()
23652400
}
23662401
while (false);
23672402

2368-
if (!buf_pool.ran_out())
2403+
if (!buf_pool.need_LRU_eviction())
23692404
continue;
23702405
mysql_mutex_lock(&buf_pool.flush_list_mutex);
23712406
oldest_lsn= buf_pool.get_oldest_modification(0);
@@ -2394,7 +2429,7 @@ static void buf_flush_page_cleaner()
23942429
if (oldest_lsn >= soft_lsn_limit)
23952430
buf_flush_async_lsn= soft_lsn_limit= 0;
23962431
}
2397-
else if (buf_pool.ran_out())
2432+
else if (buf_pool.need_LRU_eviction())
23982433
{
23992434
buf_pool.page_cleaner_set_idle(false);
24002435
buf_pool.n_flush_inc();
@@ -2509,9 +2544,11 @@ static void buf_flush_page_cleaner()
25092544
MONITOR_FLUSH_ADAPTIVE_PAGES,
25102545
n_flushed);
25112546
}
2512-
else if (buf_flush_async_lsn <= oldest_lsn)
2547+
else if (buf_flush_async_lsn <= oldest_lsn &&
2548+
!buf_pool.need_LRU_eviction())
25132549
goto check_oldest_and_set_idle;
25142550

2551+
n= srv_max_io_capacity;
25152552
n= n >= n_flushed ? n - n_flushed : 0;
25162553
goto LRU_flush;
25172554
}

storage/innobase/buf/buf0lru.cc

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,6 @@ static constexpr ulint BUF_LRU_OLD_TOLERANCE = 20;
6060
frames in the buffer pool, we set this to TRUE */
6161
static bool buf_lru_switched_on_innodb_mon = false;
6262

63-
/** True if diagnostic message about difficult to find free blocks
64-
in the buffer bool has already printed. */
65-
static bool buf_lru_free_blocks_error_printed;
66-
6763
/******************************************************************//**
6864
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
6965
and page_zip_decompress() operations. Based on the statistics,
@@ -408,6 +404,7 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
408404
buf_LRU_check_size_of_non_data_objects();
409405
buf_block_t* block;
410406

407+
IF_DBUG(static bool buf_lru_free_blocks_error_printed,);
411408
DBUG_EXECUTE_IF("ib_lru_force_no_free_page",
412409
if (!buf_lru_free_blocks_error_printed) {
413410
n_iterations = 21;
@@ -417,9 +414,25 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
417414
/* If there is a block in the free list, take it */
418415
if ((block = buf_LRU_get_free_only()) != nullptr) {
419416
got_block:
417+
const ulint LRU_size = UT_LIST_GET_LEN(buf_pool.LRU);
418+
const ulint available = UT_LIST_GET_LEN(buf_pool.free);
419+
const ulint scan_depth = srv_LRU_scan_depth / 2;
420+
ut_ad(LRU_size <= BUF_LRU_MIN_LEN || available >= scan_depth
421+
|| buf_pool.need_LRU_eviction());
422+
420423
if (!have_mutex) {
421424
mysql_mutex_unlock(&buf_pool.mutex);
422425
}
426+
427+
if (UNIV_UNLIKELY(available < scan_depth)
428+
&& LRU_size > BUF_LRU_MIN_LEN) {
429+
mysql_mutex_lock(&buf_pool.flush_list_mutex);
430+
if (!buf_pool.page_cleaner_active()) {
431+
buf_pool.page_cleaner_wakeup(true);
432+
}
433+
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
434+
}
435+
423436
block->page.zip.clear();
424437
return block;
425438
}
@@ -445,10 +458,11 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
445458
if ((block = buf_LRU_get_free_only()) != nullptr) {
446459
goto got_block;
447460
}
461+
const bool wake = buf_pool.need_LRU_eviction();
448462
mysql_mutex_unlock(&buf_pool.mutex);
449463
mysql_mutex_lock(&buf_pool.flush_list_mutex);
450464
const auto n_flush = buf_pool.n_flush();
451-
if (!buf_pool.try_LRU_scan) {
465+
if (wake && !buf_pool.page_cleaner_active()) {
452466
buf_pool.page_cleaner_wakeup(true);
453467
}
454468
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
@@ -467,9 +481,10 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
467481
MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS );
468482
}
469483

470-
if (n_iterations == 21 && !buf_lru_free_blocks_error_printed
471-
&& srv_buf_pool_old_size == srv_buf_pool_size) {
472-
buf_lru_free_blocks_error_printed = true;
484+
if (n_iterations == 21
485+
&& srv_buf_pool_old_size == srv_buf_pool_size
486+
&& buf_pool.LRU_warned.test_and_set(std::memory_order_acquire)) {
487+
IF_DBUG(buf_lru_free_blocks_error_printed = true,);
473488
mysql_mutex_unlock(&buf_pool.mutex);
474489
ib::warn() << "Difficult to find free blocks in the buffer pool"
475490
" (" << n_iterations << " search iterations)! "

storage/innobase/include/buf0buf.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,10 +1488,8 @@ class buf_pool_t
14881488
n_chunks_new / 4 * chunks->size;
14891489
}
14901490

1491-
/** @return whether the buffer pool has run out */
1492-
TPOOL_SUPPRESS_TSAN
1493-
bool ran_out() const
1494-
{ return UNIV_UNLIKELY(!try_LRU_scan || !UT_LIST_GET_LEN(free)); }
1491+
/** @return whether the buffer pool is running low */
1492+
bool need_LRU_eviction() const;
14951493

14961494
/** @return whether the buffer pool is shrinking */
14971495
inline bool is_shrinking() const
@@ -1811,6 +1809,9 @@ class buf_pool_t
18111809
Set whenever the free list grows, along with a broadcast of done_free.
18121810
Protected by buf_pool.mutex. */
18131811
Atomic_relaxed<bool> try_LRU_scan;
1812+
/** Whether we have warned to be running out of buffer pool */
1813+
std::atomic_flag LRU_warned;
1814+
18141815
/* @} */
18151816

18161817
/** @name LRU replacement algorithm fields */

0 commit comments

Comments
 (0)