Skip to content

Commit

Permalink
MDEV-22027 Assertion oldest_lsn >= log_sys.last_checkpoint_lsn failed
Browse files Browse the repository at this point in the history
log_buf_pool_get_oldest_modification(): Acquire
log_sys_t::flush_order_mutex in order to prevent a race condition
that was introduced in
commit 1a6f708 (MDEV-15058).

Before that change, log_buf_pool_get_oldest_modification()
was protected by both log_sys.mutex and log_sys.flush_order_mutex
like it was supposed to be ever since
commit a52c482 (MySQL 5.5.10).

buf_pool_t::get_oldest_modification(): Replaces
buf_pool_get_oldest_modification(), to emphasize that
log_sys.flush_order_mutex must be acquired by the caller if needed.

log_close(): Invoke log_buf_pool_get_oldest_modification()
in order to ensure a clean shutdown.

The scenario of the race condition is as follows:

1. The buffer pool is clean (no writes are pending).
2. mtr_add_dirtied_pages_to_flush_list() releases log_sys.mutex.
3. log_buf_pool_get_oldest_modification() observes that the
buffer pool is clean and returns log_sys.lsn.
4. log_checkpoint() completes, writing a wrong checkpoint header
according to which everything up to log_sys.lsn was clean.
5. mtr_add_dirtied_pages_to_flush_list() completes the execution
of mtr_memo_note_modifications(), releases the page latches and
the flush_order_mutex.
6. On a subsequent log_checkpoint(), the assertion could fail
if the page modifications had not been flushed yet.

The failing assertion (which is valid) was added in MySQL 5.7
mysql/mysql-server@5c6c6ec
and merged to MariaDB Server 10.2.2 in
commit fec844a.
  • Loading branch information
dr-m committed Jun 2, 2020
1 parent 661ebd4 commit 0d6d63e
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 54 deletions.
44 changes: 20 additions & 24 deletions storage/innobase/buf/buf0buf.cc
Expand Up @@ -483,31 +483,27 @@ static bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)

/**
@return the smallest oldest_modification lsn for any page.
@retval 0 if all modified persistent pages have been flushed */
lsn_t
buf_pool_get_oldest_modification()
@retval 0 if all modified persistent pages have been flushed */
lsn_t buf_pool_t::get_oldest_modification()
{
mutex_enter(&buf_pool.flush_list_mutex);

buf_page_t* bpage;

/* FIXME: Keep temporary tablespace pages in a separate flush
list. We would only need to write out temporary pages if the
page is about to be evicted from the buffer pool, and the page
contents is still needed (the page has not been freed). */
for (bpage = UT_LIST_GET_LAST(buf_pool.flush_list);
bpage != NULL && fsp_is_system_temporary(bpage->id.space());
bpage = UT_LIST_GET_PREV(list, bpage)) {
ut_ad(bpage->in_flush_list);
}

lsn_t oldest_lsn = bpage ? bpage->oldest_modification : 0;
mutex_exit(&buf_pool.flush_list_mutex);

/* The returned answer may be out of date: the flush_list can
change after the mutex has been released. */

return(oldest_lsn);
mutex_enter(&flush_list_mutex);

/* FIXME: Keep temporary tablespace pages in a separate flush
list. We would only need to write out temporary pages if the
page is about to be evicted from the buffer pool, and the page
contents is still needed (the page has not been freed). */
const buf_page_t *bpage;
for (bpage= UT_LIST_GET_LAST(flush_list);
bpage && fsp_is_system_temporary(bpage->id.space());
bpage= UT_LIST_GET_PREV(list, bpage))
ut_ad(bpage->in_flush_list);

lsn_t oldest_lsn= bpage ? bpage->oldest_modification : 0;
mutex_exit(&flush_list_mutex);

/* The result may become stale as soon as we released the mutex.
On log checkpoint, also log_sys.flush_order_mutex will be needed. */
return oldest_lsn;
}

/** Allocate a buffer block.
Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/buf/buf0flu.cc
Expand Up @@ -2441,7 +2441,7 @@ page_cleaner_flush_pages_recommendation(ulint last_pages_in)
sum_pages = 0;
}

oldest_lsn = buf_pool_get_oldest_modification();
oldest_lsn = buf_pool.get_oldest_modification();

ut_ad(oldest_lsn <= log_get_lsn());

Expand Down
10 changes: 5 additions & 5 deletions storage/innobase/include/buf0buf.h
Expand Up @@ -198,11 +198,6 @@ UNIV_INLINE
ulint
buf_pool_get_curr_size(void);
/*========================*/
/**
@return the smallest oldest_modification lsn for any page.
@retval 0 if all modified persistent pages have been flushed */
lsn_t
buf_pool_get_oldest_modification();

/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
Expand Down Expand Up @@ -1868,6 +1863,11 @@ class buf_pool_t
bool is_block_lock(const BPageLock *l) const
{ return is_block_field(reinterpret_cast<const void*>(l)); }

/**
@return the smallest oldest_modification lsn for any page
@retval 0 if all modified persistent pages have been flushed */
lsn_t get_oldest_modification();

/** Determine if a buffer block was created by chunk_t::create().
@param block block descriptor (not dereferenced)
@return whether block has been created by chunk_t::create() */
Expand Down
6 changes: 3 additions & 3 deletions storage/innobase/include/log0log.h
Expand Up @@ -139,7 +139,7 @@ log_get_max_modified_age_async(void);
/*================================*/

/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
and lsn - buf_get_oldest_modification().
and lsn - buf_pool.get_oldest_modification().
@param[in] file_size requested innodb_log_file_size
@retval true on success
@retval false if the smallest log is too small to
Expand Down Expand Up @@ -667,13 +667,13 @@ struct log_t{
lsn_t max_modified_age_async;
/*!< when this recommended
value for lsn -
buf_pool_get_oldest_modification()
buf_pool.get_oldest_modification()
is exceeded, we start an
asynchronous preflush of pool pages */
lsn_t max_modified_age_sync;
/*!< when this recommended
value for lsn -
buf_pool_get_oldest_modification()
buf_pool.get_oldest_modification()
is exceeded, we start a
synchronous preflush of pool pages */
lsn_t max_checkpoint_age_async;
Expand Down
30 changes: 10 additions & 20 deletions storage/innobase/log/log0log.cc
Expand Up @@ -91,27 +91,17 @@ should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
the previous */
#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8

/****************************************************************//**
Returns the oldest modified block lsn in the pool, or log_sys.lsn if none
exists.
/** Return the oldest modified LSN in buf_pool.flush_list,
or the latest LSN if all pages are clean.
@return LSN of oldest modification */
static
lsn_t
log_buf_pool_get_oldest_modification(void)
/*======================================*/
static lsn_t log_buf_pool_get_oldest_modification()
{
lsn_t lsn;

ut_ad(log_mutex_own());

lsn = buf_pool_get_oldest_modification();
ut_ad(log_mutex_own());
log_flush_order_mutex_enter();
lsn_t lsn= buf_pool.get_oldest_modification();
log_flush_order_mutex_exit();

if (!lsn) {

lsn = log_sys.get_lsn();
}

return(lsn);
return lsn ? lsn : log_sys.get_lsn();
}

/** Extends the log buffer.
Expand Down Expand Up @@ -419,7 +409,7 @@ log_close(void)
goto function_exit;
}

oldest_lsn = buf_pool_get_oldest_modification();
oldest_lsn = log_buf_pool_get_oldest_modification();

if (!oldest_lsn
|| lsn - oldest_lsn > log_sys.max_modified_age_sync
Expand All @@ -432,7 +422,7 @@ log_close(void)
}

/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
and lsn - buf_get_oldest_modification().
and lsn - buf_pool.get_oldest_modification().
@param[in] file_size requested innodb_log_file_size
@retval true on success
@retval false if the smallest log group is too small to
Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/srv/srv0mon.cc
Expand Up @@ -2002,7 +2002,7 @@ srv_mon_process_existing_counter(
break;

case MONITOR_OVLD_BUF_OLDEST_LSN:
value = (mon_type_t) buf_pool_get_oldest_modification();
value = (mon_type_t) buf_pool.get_oldest_modification();
break;

case MONITOR_OVLD_LSN_CHECKPOINT:
Expand Down

0 comments on commit 0d6d63e

Please sign in to comment.