Skip to content
Permalink
Browse files
MDEV-26200 buf_pool.flush_list corrupted by buffer pool resizing or R…
…OW_FORMAT=COMPRESSED

The lazy deletion of clean blocks from buf_pool.flush_list that was
introduced in commit 6441bc6 (MDEV-25113)
introduced a race condition around the function
buf_flush_relocate_on_flush_list().

The test innodb_zip.wl5522_debug_zip as well as the buffer pool
resizing tests would occasionally fail in debug builds due to
buf_pool.flush_list.count disagreeing with the actual length of the
doubly-linked list.

The safe procedure for relocating a block in buf_pool.flush_list should be
as follows, now that we implement lazy deletion from buf_pool.flush_list:

1. Acquire buf_pool.mutex.
2. Acquire the exclusive buf_pool.page_hash.latch.
3. Acquire buf_pool.flush_list_mutex.
4. Copy the block descriptor.
5. Invoke buf_flush_relocate_on_flush_list().
6. Release buf_pool.flush_list_mutex.

buf_flush_relocate_on_flush_list(): Assert that
buf_pool.flush_list_mutex is being held. Invoke
buf_page_t::oldest_modification() only once, using
std::memory_order_relaxed, now that the mutex protects us.

buf_LRU_free_page(), buf_LRU_block_remove_hashed(): Avoid
an unlock-lock cycle on hash_lock. (We must not acquire hash_lock
while already holding buf_pool.flush_list_mutex, because that
could lead to a deadlock due to latching order violation.)
  • Loading branch information
dr-m committed Jul 22, 2021
1 parent 316a8ce commit bf435a3
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 26 deletions.
@@ -1566,6 +1566,7 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
if (block->page.can_relocate()) {
memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(
new_block->frame, block->frame, srv_page_size);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
new (&new_block->page) buf_page_t(block->page);

/* relocate LRU list */
@@ -1625,6 +1626,7 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
buf_flush_relocate_on_flush_list(&block->page,
&new_block->page);
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
block->page.set_corrupt_id();

/* set other flags of buf_block_t */
@@ -3131,12 +3133,14 @@ buf_page_get_low(
/* Note: this is the uncompressed block and it is not
accessible by other threads yet because it is not in
any list or hash table */
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_relocate(bpage, &block->page);

/* Set after buf_relocate(). */
block->page.set_buf_fix_count(1);

buf_flush_relocate_on_flush_list(bpage, &block->page);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);

/* Buffer-fix, I/O-fix, and X-latch the block
for the duration of the decompression.
@@ -3646,8 +3650,10 @@ buf_page_create(fil_space_t *space, uint32_t offset,
}

rw_lock_x_lock(&free_block->lock);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_relocate(&block->page, &free_block->page);
buf_flush_relocate_on_flush_list(&block->page, &free_block->page);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);

free_block->page.set_state(BUF_BLOCK_FILE_PAGE);
buf_unzip_LRU_add_block(free_block, FALSE);
@@ -286,43 +286,29 @@ buf_flush_relocate_on_flush_list(
{
buf_page_t* prev;

mysql_mutex_assert_owner(&buf_pool.mutex);
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
ut_ad(!fsp_is_system_temporary(bpage->id().space()));

const lsn_t lsn = bpage->oldest_modification_acquire();
const lsn_t lsn = bpage->oldest_modification();

if (!lsn) {
return;
}

ut_ad(lsn == 1 || lsn > 2);

mysql_mutex_lock(&buf_pool.flush_list_mutex);

/* FIXME: Can we avoid holding buf_pool.mutex here? */
ut_ad(dpage->oldest_modification() == lsn);

if (ut_d(const lsn_t o_lsn =) bpage->oldest_modification()) {
ut_ad(o_lsn == lsn);
/* Important that we adjust the hazard pointer before removing
the bpage from the flush list. */
buf_pool.flush_hp.adjust(bpage);

/* Important that we adjust the hazard pointer before removing
the bpage from the flush list. */
buf_pool.flush_hp.adjust(bpage);
prev = UT_LIST_GET_PREV(list, bpage);
UT_LIST_REMOVE(buf_pool.flush_list, bpage);

prev = UT_LIST_GET_PREV(list, bpage);
UT_LIST_REMOVE(buf_pool.flush_list, bpage);

bpage->clear_oldest_modification();
} else {
/* bpage was removed from buf_pool.flush_list
since we last checked, and before we acquired
buf_pool.flush_list_mutex. */
goto was_clean;
}
bpage->clear_oldest_modification();

if (lsn == 1) {
buf_pool.stat.flush_list_bytes -= dpage->physical_size();
was_clean:
dpage->list.prev = nullptr;
dpage->list.next = nullptr;
dpage->clear_oldest_modification();
@@ -334,7 +320,6 @@ buf_flush_relocate_on_flush_list(
}

ut_d(buf_flush_validate_low());
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}

/** Complete write of a file page from buf_pool.
@@ -845,6 +845,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
} else if (bpage->state() == BUF_BLOCK_FILE_PAGE) {
b = buf_page_alloc_descriptor();
ut_a(b);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
new (b) buf_page_t(*bpage);
b->set_state(BUF_BLOCK_ZIP_PAGE);
}
@@ -859,6 +860,8 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
ut_ad(bpage->can_relocate());

if (!buf_LRU_block_remove_hashed(bpage, id, hash_lock, zip)) {
ut_ad(!b);
mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
return(true);
}

@@ -872,8 +875,6 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
if (UNIV_LIKELY_NULL(b)) {
buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);

hash_lock->write_lock();

ut_ad(!buf_pool.page_hash_get_low(id, fold));
ut_ad(b->zip_size());

@@ -940,6 +941,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
}

buf_flush_relocate_on_flush_list(bpage, b);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);

bpage->zip.data = nullptr;

@@ -950,6 +952,8 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
hash_lock. */
b->set_io_fix(BUF_IO_PIN);
hash_lock->write_unlock();
} else if (!zip) {
hash_lock->write_unlock();
}

buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
@@ -1182,6 +1186,10 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
MEM_UNDEFINED(((buf_block_t*) bpage)->frame, srv_page_size);
bpage->set_state(BUF_BLOCK_REMOVE_HASH);

if (!zip) {
return true;
}

/* Question: If we release hash_lock here
then what protects us against:
1) Some other thread buffer fixing this page
@@ -1203,7 +1211,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
page_hash. */
hash_lock->write_unlock();

if (zip && bpage->zip.data) {
if (bpage->zip.data) {
/* Free the compressed page. */
void* data = bpage->zip.data;
bpage->zip.data = NULL;

0 comments on commit bf435a3

Please sign in to comment.