Skip to content

Commit 6441bc6

Browse files
committed
MDEV-25113: Introduce a page cleaner mode before 'furious flush'
MDEV-23855 changed the way how the page cleaner is signaled by user threads. If a threshold is exceeded, a mini-transaction commit would invoke buf_flush_ahead() in order to initiate page flushing before all writers would eventually grind to halt in log_free_check(), waiting for the checkpoint age to reduce. However, buf_flush_ahead() would always initiate 'furious flushing', making the buf_flush_page_cleaner thread write innodb_io_capacity_max pages per batch, and sleeping no time between batches, until the limit LSN is reached. Because this could saturate the I/O subsystem, system throughput could significantly reduce during these 'furious flushing' spikes. With this change, we introduce a gentler version of flush-ahead, which would write innodb_io_capacity_max pages per second until the 'soft limit' is reached. buf_flush_ahead(): Add a parameter to specify whether furious flushing is requested. buf_flush_async_lsn: Similar to buf_flush_sync_lsn, a limit for the less intrusive flushing. buf_flush_page_cleaner(): Keep working until buf_flush_async_lsn has been reached. log_close(): Suppress a warning message in the event that a new log is being created during startup, when old logs did not exist. Return what type of page cleaning will be needed. mtr_t::finish_write(): Also when m_log.is_small(), invoke log_close(). Return what type of page cleaning will be needed. mtr_t::commit(): Invoke buf_flush_ahead() based on the return value of mtr_t::finish_write().
1 parent 22b62ed commit 6441bc6

File tree

4 files changed

+73
-42
lines changed

4 files changed

+73
-42
lines changed

storage/innobase/buf/buf0flu.cc

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,11 @@ static constexpr ulint buf_flush_lsn_scan_factor = 3;
6363
/** Average redo generation rate */
6464
static lsn_t lsn_avg_rate = 0;
6565

66-
/** Target oldest_modification for the page cleaner; writes are protected by
67-
buf_pool.flush_list_mutex */
66+
/** Target oldest_modification for the page cleaner background flushing;
67+
writes are protected by buf_pool.flush_list_mutex */
68+
static Atomic_relaxed<lsn_t> buf_flush_async_lsn;
69+
/** Target oldest_modification for the page cleaner furious flushing;
70+
writes are protected by buf_pool.flush_list_mutex */
6871
static Atomic_relaxed<lsn_t> buf_flush_sync_lsn;
6972

7073
#ifdef UNIV_PFS_THREAD
@@ -1905,24 +1908,26 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn)
19051908
}
19061909
}
19071910

1908-
/** If innodb_flush_sync=ON, initiate a furious flush.
1909-
@param lsn buf_pool.get_oldest_modification(LSN_MAX) target */
1910-
void buf_flush_ahead(lsn_t lsn)
1911+
/** Initiate more eager page flushing if the log checkpoint age is too old.
1912+
@param lsn buf_pool.get_oldest_modification(LSN_MAX) target
1913+
@param furious true=furious flushing, false=limit to innodb_io_capacity */
1914+
ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious)
19111915
{
19121916
mysql_mutex_assert_not_owner(&log_sys.mutex);
19131917
ut_ad(!srv_read_only_mode);
19141918

19151919
if (recv_recovery_is_on())
19161920
recv_sys.apply(true);
19171921

1918-
if (buf_flush_sync_lsn < lsn)
1922+
Atomic_relaxed<lsn_t> &limit= furious
1923+
? buf_flush_sync_lsn : buf_flush_async_lsn;
1924+
1925+
if (limit < lsn)
19191926
{
19201927
mysql_mutex_lock(&buf_pool.flush_list_mutex);
1921-
if (buf_flush_sync_lsn < lsn)
1922-
{
1923-
buf_flush_sync_lsn= lsn;
1924-
pthread_cond_signal(&buf_pool.do_flush_list);
1925-
}
1928+
if (limit < lsn)
1929+
limit= lsn;
1930+
pthread_cond_signal(&buf_pool.do_flush_list);
19261931
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
19271932
}
19281933
}
@@ -1997,6 +2002,8 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn)
19972002

19982003
if (measure >= target)
19992004
buf_flush_sync_lsn= 0;
2005+
else if (measure >= buf_flush_async_lsn)
2006+
buf_flush_async_lsn= 0;
20002007

20012008
/* wake up buf_flush_wait_flushed() */
20022009
pthread_cond_broadcast(&buf_pool.done_flush_list);
@@ -2016,7 +2023,7 @@ static bool af_needed_for_redo(lsn_t oldest_lsn)
20162023
{
20172024
lsn_t age= (log_sys.get_lsn() - oldest_lsn);
20182025
lsn_t af_lwm= static_cast<lsn_t>(srv_adaptive_flushing_lwm *
2019-
static_cast<double>(log_sys.log_capacity) / 100);
2026+
static_cast<double>(log_sys.log_capacity) / 100);
20202027

20212028
/* if age > af_lwm adaptive flushing is recommended */
20222029
return (age > af_lwm);
@@ -2240,6 +2247,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
22402247

22412248
set_timespec(abstime, 1);
22422249

2250+
lsn_t soft_lsn_limit= buf_flush_async_lsn;
22432251
lsn_limit= buf_flush_sync_lsn;
22442252

22452253
if (UNIV_UNLIKELY(lsn_limit != 0))
@@ -2261,6 +2269,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
22612269
pthread_cond_broadcast(&buf_pool.done_flush_list);
22622270
}
22632271
unemployed:
2272+
buf_flush_async_lsn= 0;
22642273
buf_pool.page_cleaner_set_idle(true);
22652274
continue;
22662275
}
@@ -2275,7 +2284,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
22752284

22762285
bool idle_flush= false;
22772286

2278-
if (lsn_limit);
2287+
if (lsn_limit || soft_lsn_limit);
22792288
else if (af_needed_for_redo(oldest_lsn));
22802289
else if (srv_max_dirty_pages_pct_lwm != 0.0)
22812290
{
@@ -2300,11 +2309,16 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
23002309
goto unemployed;
23012310

23022311
if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit)
2303-
buf_flush_sync_lsn= 0;
2312+
lsn_limit= buf_flush_sync_lsn= 0;
2313+
if (UNIV_UNLIKELY(soft_lsn_limit != 0) && oldest_lsn >= soft_lsn_limit)
2314+
soft_lsn_limit= buf_flush_async_lsn= 0;
23042315

23052316
buf_pool.page_cleaner_set_idle(false);
23062317
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
23072318

2319+
if (!lsn_limit)
2320+
lsn_limit= soft_lsn_limit;
2321+
23082322
ulint n_flushed;
23092323

23102324
if (UNIV_UNLIKELY(lsn_limit != 0))
@@ -2355,7 +2369,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
23552369
goto do_checkpoint;
23562370
}
23572371
}
2358-
else
2372+
else if (buf_flush_async_lsn <= oldest_lsn)
23592373
{
23602374
mysql_mutex_lock(&buf_pool.flush_list_mutex);
23612375
goto unemployed;
@@ -2410,6 +2424,7 @@ ATTRIBUTE_COLD void buf_flush_page_cleaner_init()
24102424
ut_ad(srv_operation == SRV_OPERATION_NORMAL ||
24112425
srv_operation == SRV_OPERATION_RESTORE ||
24122426
srv_operation == SRV_OPERATION_RESTORE_EXPORT);
2427+
buf_flush_async_lsn= 0;
24132428
buf_flush_sync_lsn= 0;
24142429
buf_page_cleaner_is_active= true;
24152430
os_thread_create(buf_flush_page_cleaner);

storage/innobase/include/buf0flu.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,10 @@ void buf_flush_wait_batch_end(bool lru);
111111
/** Wait until all persistent pages are flushed up to a limit.
112112
@param sync_lsn buf_pool.get_oldest_modification(LSN_MAX) to wait for */
113113
ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn);
114-
/** If innodb_flush_sync=ON, initiate a furious flush.
115-
@param lsn buf_pool.get_oldest_modification(LSN_MAX) target */
116-
void buf_flush_ahead(lsn_t lsn);
114+
/** Initiate more eager page flushing if the log checkpoint age is too old.
115+
@param lsn buf_pool.get_oldest_modification(LSN_MAX) target
116+
@param furious true=furious flushing, false=limit to innodb_io_capacity */
117+
ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious);
117118

118119
/********************************************************************//**
119120
This function should be called at a mini-transaction commit, if a page was

storage/innobase/include/mtr0mtr.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,17 @@ struct mtr_t {
588588
@return number of buffer count added by this mtr */
589589
uint32_t get_fix_count(const buf_block_t *block) const;
590590

591+
/** type of page flushing is needed during commit() */
592+
enum page_flush_ahead
593+
{
594+
/** no need to trigger page cleaner */
595+
PAGE_FLUSH_NO= 0,
596+
/** asynchronous flushing is needed */
597+
PAGE_FLUSH_ASYNC,
598+
/** furious flushing is needed */
599+
PAGE_FLUSH_SYNC
600+
};
601+
591602
private:
592603
/** Log a write of a byte string to a page.
593604
@param block buffer page
@@ -621,7 +632,7 @@ struct mtr_t {
621632
/** Append the redo log records to the redo log buffer.
622633
@param len number of bytes to write
623634
@return {start_lsn,flush_ahead} */
624-
inline std::pair<lsn_t,bool> finish_write(ulint len);
635+
inline std::pair<lsn_t,page_flush_ahead> finish_write(ulint len);
625636

626637
/** Release the resources */
627638
inline void release_resources();

storage/innobase/mtr/mtr0mtr.cc

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -402,12 +402,12 @@ void mtr_t::commit()
402402
{
403403
ut_ad(!srv_read_only_mode || m_log_mode == MTR_LOG_NO_REDO);
404404

405-
std::pair<lsn_t,bool> lsns;
405+
std::pair<lsn_t,page_flush_ahead> lsns;
406406

407407
if (const ulint len= prepare_write())
408408
lsns= finish_write(len);
409409
else
410-
lsns= { m_commit_lsn, false };
410+
lsns= { m_commit_lsn, PAGE_FLUSH_NO };
411411

412412
if (m_made_dirty)
413413
mysql_mutex_lock(&log_sys.flush_order_mutex);
@@ -447,8 +447,8 @@ void mtr_t::commit()
447447

448448
m_memo.for_each_block_in_reverse(CIterate<ReleaseLatches>());
449449

450-
if (lsns.second)
451-
buf_flush_ahead(m_commit_lsn);
450+
if (UNIV_UNLIKELY(lsns.second != PAGE_FLUSH_NO))
451+
buf_flush_ahead(m_commit_lsn, lsns.second == PAGE_FLUSH_SYNC);
452452

453453
if (m_made_dirty)
454454
srv_stats.log_write_requests.inc();
@@ -754,7 +754,7 @@ static void log_write_low(const void *str, size_t size)
754754

755755
/** Close the log at mini-transaction commit.
756756
@return whether buffer pool flushing is needed */
757-
static bool log_close(lsn_t lsn)
757+
static mtr_t::page_flush_ahead log_close(lsn_t lsn)
758758
{
759759
mysql_mutex_assert_owner(&log_sys.mutex);
760760
ut_ad(lsn == log_sys.get_lsn());
@@ -777,7 +777,9 @@ static bool log_close(lsn_t lsn)
777777

778778
const lsn_t checkpoint_age= lsn - log_sys.last_checkpoint_lsn;
779779

780-
if (UNIV_UNLIKELY(checkpoint_age >= log_sys.log_capacity))
780+
if (UNIV_UNLIKELY(checkpoint_age >= log_sys.log_capacity) &&
781+
/* silence message on create_log_file() after the log had been deleted */
782+
checkpoint_age != lsn)
781783
{
782784
time_t t= time(nullptr);
783785
if (!log_close_warned || difftime(t, log_close_warn_time) > 15)
@@ -786,15 +788,17 @@ static bool log_close(lsn_t lsn)
786788
log_close_warn_time= t;
787789

788790
ib::error() << "The age of the last checkpoint is " << checkpoint_age
789-
<< ", which exceeds the log capacity "
790-
<< log_sys.log_capacity << ".";
791+
<< ", which exceeds the log capacity "
792+
<< log_sys.log_capacity << ".";
791793
}
792794
}
795+
else if (UNIV_LIKELY(checkpoint_age <= log_sys.max_modified_age_async))
796+
return mtr_t::PAGE_FLUSH_NO;
793797
else if (UNIV_LIKELY(checkpoint_age <= log_sys.max_checkpoint_age))
794-
return false;
798+
return mtr_t::PAGE_FLUSH_ASYNC;
795799

796800
log_sys.set_check_flush_or_checkpoint();
797-
return true;
801+
return mtr_t::PAGE_FLUSH_SYNC;
798802
}
799803

800804
/** Write the block contents to the REDO log */
@@ -858,8 +862,8 @@ inline ulint mtr_t::prepare_write()
858862

859863
/** Append the redo log records to the redo log buffer.
860864
@param len number of bytes to write
861-
@return {start_lsn,flush_ahead_lsn} */
862-
inline std::pair<lsn_t,bool> mtr_t::finish_write(ulint len)
865+
@return {start_lsn,flush_ahead} */
866+
inline std::pair<lsn_t,mtr_t::page_flush_ahead> mtr_t::finish_write(ulint len)
863867
{
864868
ut_ad(m_log_mode == MTR_LOG_ALL);
865869
mysql_mutex_assert_owner(&log_sys.mutex);
@@ -875,19 +879,19 @@ inline std::pair<lsn_t,bool> mtr_t::finish_write(ulint len)
875879
m_commit_lsn = log_reserve_and_write_fast(front->begin(), len,
876880
&start_lsn);
877881

878-
if (m_commit_lsn) {
879-
return std::make_pair(start_lsn, false);
882+
if (!m_commit_lsn) {
883+
goto piecewise;
880884
}
885+
} else {
886+
piecewise:
887+
/* Open the database log for log_write_low */
888+
start_lsn = log_reserve_and_open(len);
889+
mtr_write_log write_log;
890+
m_log.for_each_block(write_log);
891+
m_commit_lsn = log_sys.get_lsn();
881892
}
882-
883-
/* Open the database log for log_write_low */
884-
start_lsn = log_reserve_and_open(len);
885-
886-
mtr_write_log write_log;
887-
m_log.for_each_block(write_log);
888-
m_commit_lsn = log_sys.get_lsn();
889-
bool flush = log_close(m_commit_lsn);
890-
DBUG_EXECUTE_IF("ib_log_flush_ahead", flush=true;);
893+
page_flush_ahead flush= log_close(m_commit_lsn);
894+
DBUG_EXECUTE_IF("ib_log_flush_ahead", flush = PAGE_FLUSH_SYNC;);
891895

892896
return std::make_pair(start_lsn, flush);
893897
}

0 commit comments

Comments
 (0)