Skip to content

Commit

Permalink
Remove code duplication around buf_pool->flush_rbt
Browse files Browse the repository at this point in the history
The purpose of buf_pool->flush_rbt is to ensure that
buf_pool->flush_list is ordered by oldest_modification.
This should speed up multi-pass redo log application
(when the buffer pool is not large enough to accommodate
all pages that were modified since the latest log checkpoint).

The buf_pool->flush_rbt is not being used after redo log has
been applied. It could be better to always flush pages in
the ascending order of oldest_modification. Currently, whenever
a page is first modified, it will be moved to the start of the
buf_pool->flush_list, overtaking blocks whose oldest_modification
could be much older.

buf_flush_insert_sorted_into_flush_list(): Merge into
buf_flush_insert_into_flush_list().

buf_flush_recv_note_modification(): Remove.
The function buf_flush_note_modification() can be invoked instead.
  • Loading branch information
dr-m committed Jan 11, 2019
1 parent 301bd62 commit e7924a8
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 213 deletions.
159 changes: 33 additions & 126 deletions storage/innobase/buf/buf0flu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -427,137 +427,44 @@ buf_flush_insert_into_flush_list(
ut_ad(buf_page_mutex_own(block));

buf_flush_list_mutex_enter(buf_pool);

ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
<= lsn));

/* If we are in the recovery then we need to update the flush
red-black tree as well. */
if (buf_pool->flush_rbt != NULL) {
buf_flush_list_mutex_exit(buf_pool);
buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
return;
}

ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(!block->page.in_flush_list);

ut_d(block->page.in_flush_list = TRUE);
ut_ad(!block->page.oldest_modification);
block->page.oldest_modification = lsn;

UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);

UNIV_MEM_ASSERT_RW(block->page.zip
? block->page.zip.data : block->frame,
block->page.size.physical());
incr_flush_list_size_in_bytes(block, buf_pool);

#ifdef UNIV_DEBUG_VALGRIND
void* p;

if (block->page.size.is_compressed()) {
p = block->page.zip.data;
} else {
p = block->frame;
}

UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
#endif /* UNIV_DEBUG_VALGRIND */

#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */

buf_flush_list_mutex_exit(buf_pool);
}

/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn) /*!< in: oldest modification */
{
buf_page_t* prev_b;
buf_page_t* b;

ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(log_flush_order_mutex_own());
ut_ad(buf_page_mutex_own(block));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);

buf_flush_list_mutex_enter(buf_pool);

/* The field in_LRU_list is protected by buf_pool->mutex, which
we are not holding. However, while a block is in the flush
list, it is dirty and cannot be discarded, not from the
page_hash or from the LRU list. At most, the uncompressed
page frame of a compressed block may be discarded or created
(copying the block->page to or from a buf_page_t that is
dynamically allocated from buf_buddy_alloc()). Because those
transitions hold block->mutex and the flush list mutex (via
buf_flush_relocate_on_flush_list()), there is no possibility
of a race condition in the assertions below. */
ut_ad(block->page.in_LRU_list);
ut_ad(block->page.in_page_hash);
/* buf_buddy_block_register() will take a block in the
BUF_BLOCK_MEMORY state, not a file page. */
ut_ad(!block->page.in_zip_hash);

ut_ad(!block->page.in_flush_list);
ut_d(block->page.in_flush_list = TRUE);
block->page.oldest_modification = lsn;

#ifdef UNIV_DEBUG_VALGRIND
void* p;

if (block->page.size.is_compressed()) {
p = block->page.zip.data;
} else {
p = block->frame;
}

UNIV_MEM_ASSERT_RW(p, block->page.size.physical());
#endif /* UNIV_DEBUG_VALGRIND */

prev_b = NULL;

/* For the most part when this function is called the flush_rbt
should not be NULL. In a very rare boundary case it is possible
that the flush_rbt has already been freed by the recovery thread
before the last page was hooked up in the flush_list by the
io-handler thread. In that case we'll just do a simple
linear search in the else block. */
if (buf_pool->flush_rbt != NULL) {

prev_b = buf_flush_insert_in_flush_rbt(&block->page);

} else {

b = UT_LIST_GET_FIRST(buf_pool->flush_list);

while (b != NULL && b->oldest_modification
> block->page.oldest_modification) {

ut_ad(b->in_flush_list);
prev_b = b;
b = UT_LIST_GET_NEXT(list, b);
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
/* The field in_LRU_list is protected by buf_pool->mutex, which
we are not holding. However, while a block is in the flush
list, it is dirty and cannot be discarded, not from the
page_hash or from the LRU list. At most, the uncompressed
page frame of a compressed block may be discarded or created
(copying the block->page to or from a buf_page_t that is
dynamically allocated from buf_buddy_alloc()). Because those
transitions hold block->mutex and the flush list mutex (via
buf_flush_relocate_on_flush_list()), there is no possibility
of a race condition in the assertions below. */
ut_ad(block->page.in_LRU_list);
ut_ad(block->page.in_page_hash);
/* buf_buddy_block_register() will take a block in the
BUF_BLOCK_MEMORY state, not a file page. */
ut_ad(!block->page.in_zip_hash);

if (buf_page_t* prev_b =
buf_flush_insert_in_flush_rbt(&block->page)) {
UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
goto func_exit;
}
}

if (prev_b == NULL) {
UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
} else {
UT_LIST_INSERT_AFTER(buf_pool->flush_list, prev_b, &block->page);
}

incr_flush_list_size_in_bytes(block, buf_pool);

UT_LIST_ADD_FIRST(buf_pool->flush_list, &block->page);
func_exit:
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low(buf_pool));
ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */

buf_flush_list_mutex_exit(buf_pool);
Expand Down Expand Up @@ -686,7 +593,7 @@ buf_flush_remove(
}

/* If the flush_rbt is active then delete from there as well. */
if (buf_pool->flush_rbt != NULL) {
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage);
}

Expand Down Expand Up @@ -754,7 +661,7 @@ buf_flush_relocate_on_flush_list(

/* If recovery is active we must swap the control blocks in
the flush_rbt as well. */
if (buf_pool->flush_rbt != NULL) {
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage);
prev_b = buf_flush_insert_in_flush_rbt(dpage);
}
Expand Down Expand Up @@ -3600,7 +3507,7 @@ buf_flush_validate_low(
/* If we are in recovery mode i.e.: flush_rbt != NULL
then each block in the flush_list must also be present
in the flush_rbt. */
if (buf_pool->flush_rbt != NULL) {
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
rnode = rbt_first(buf_pool->flush_rbt);
}

Expand All @@ -3621,7 +3528,7 @@ buf_flush_validate_low(
|| buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
ut_a(om > 0);

if (buf_pool->flush_rbt != NULL) {
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_page_t** prpage;

ut_a(rnode != NULL);
Expand Down
12 changes: 0 additions & 12 deletions storage/innobase/include/buf0flu.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,18 +191,6 @@ buf_flush_note_modification(
lsn_t end_lsn, /*!< in: end lsn of the last mtr in the
set of mtr's */
FlushObserver* observer); /*!< in: flush observer */

/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /*!< in: block which is modified */
lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
lsn_t end_lsn); /*!< in: end lsn of the last mtr in the
set of mtr's */
/********************************************************************//**
Returns TRUE if the file page block is immediately suitable for replacement,
i.e., transition FILE_PAGE => NOT_USED allowed.
Expand Down
79 changes: 5 additions & 74 deletions storage/innobase/include/buf0flu.ic
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,6 @@ buf_flush_insert_into_flush_list(
buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn); /*!< in: oldest modification */

/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
buf_block_t* block, /*!< in/out: block which is modified */
lsn_t lsn); /*!< in: oldest modification */

/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it is not
Expand All @@ -63,24 +52,11 @@ buf_flush_note_modification(
modified this block */
FlushObserver* observer) /*!< in: flush observer */
{
#ifdef UNIV_DEBUG
{
/* Allow write to proceed to shared temporary tablespace
in read-only mode. */
ut_ad(!srv_read_only_mode
|| fsp_is_system_temporary(block->page.id.space()));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);

buf_pool_t* buf_pool = buf_pool_from_block(block);

ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(!buf_flush_list_mutex_own(buf_pool));
}
#endif /* UNIV_DEBUG */

mutex_enter(&block->mutex);

ut_ad(!srv_read_only_mode
|| fsp_is_system_temporary(block->page.id.space()));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
ut_ad(block->page.newest_modification <= end_lsn);
block->page.newest_modification = end_lsn;

Expand All @@ -98,52 +74,7 @@ buf_flush_note_modification(
ut_ad(block->page.oldest_modification <= start_lsn);
}

buf_page_mutex_exit(block);
mutex_exit(&block->mutex);

srv_stats.buf_pool_write_requests.inc();
}

/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /*!< in: block which is modified */
lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
lsn_t end_lsn) /*!< in: end lsn of the last mtr in the
set of mtr's */
{
#ifdef UNIV_DEBUG
{
ut_ad(!srv_read_only_mode);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);

buf_pool_t* buf_pool = buf_pool_from_block(block);

ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(!buf_flush_list_mutex_own(buf_pool));

ut_ad(start_lsn != 0);
ut_ad(block->page.newest_modification <= end_lsn);
}
#endif /* UNIV_DEBUG */

buf_page_mutex_enter(block);

block->page.newest_modification = end_lsn;

if (!block->page.oldest_modification) {
buf_pool_t* buf_pool = buf_pool_from_block(block);

buf_flush_insert_sorted_into_flush_list(
buf_pool, block, start_lsn);
} else {
ut_ad(block->page.oldest_modification <= start_lsn);
}

buf_page_mutex_exit(block);

}
2 changes: 1 addition & 1 deletion storage/innobase/log/log0recv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1925,7 +1925,7 @@ recv_recover_page(bool just_read_in, buf_block_t* block)

if (start_lsn) {
log_flush_order_mutex_enter();
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
buf_flush_note_modification(block, start_lsn, end_lsn, NULL);
log_flush_order_mutex_exit();
}

Expand Down

0 comments on commit e7924a8

Please sign in to comment.