Skip to content

Commit

Permalink
MDEV-12227 Defer writes to the InnoDB temporary tablespace
Browse files Browse the repository at this point in the history
The flushing of the InnoDB temporary tablespace is unnecessarily
tied to the write-ahead redo logging and redo log checkpoints,
which must be tied to the page writes of persistent tablespaces.

Let us simply omit any pages of temporary tables from buf_pool.flush_list.
In this way, log checkpoints will never incur any 'collateral damage' of
writing out unmodified changes for temporary tables.

After this change, pages of the temporary tablespace can only be written
out by buf_flush_lists(n_pages,0) as part of LRU eviction. Hopefully,
most of the time, that code will never be executed, and instead, the
temporary pages will be evicted by buf_release_freed_page() without
ever being written back to the temporary tablespace file.

This should improve the efficiency of the checkpoint flushing and
the buf_flush_page_cleaner thread.

Reviewed by: Vladislav Vaintroub
  • Loading branch information
dr-m committed Dec 9, 2020
1 parent ea21d63 commit 5eb5395
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 42 deletions.
2 changes: 1 addition & 1 deletion mysql-test/suite/innodb/r/autoinc_debug.result
Expand Up @@ -4,7 +4,7 @@ SET AUTO_INCREMENT_INCREMENT = 1;
# MDEV-24348 InnoDB shutdown hang with innodb_flush_sync=0
SET GLOBAL innodb_flush_sync=OFF;
# For the server to hang, we must have pages for temporary tables
# (and this is only effective as long as MDEV-12227 is not fixed).
# (and the bug depended on MDEV-12227 not being fixed).
CREATE TEMPORARY TABLE t (id SERIAL) ENGINE=InnoDB;
SET debug_dbug= '+d,ib_log_flush_ahead';
INSERT INTO t1 VALUES(NULL);
Expand Down
2 changes: 1 addition & 1 deletion mysql-test/suite/innodb/t/autoinc_debug.test
Expand Up @@ -12,7 +12,7 @@ SET AUTO_INCREMENT_INCREMENT = 1;
--echo # MDEV-24348 InnoDB shutdown hang with innodb_flush_sync=0
SET GLOBAL innodb_flush_sync=OFF;
--echo # For the server to hang, we must have pages for temporary tables
--echo # (and this is only effective as long as MDEV-12227 is not fixed).
--echo # (and the bug depended on MDEV-12227 not being fixed).
CREATE TEMPORARY TABLE t (id SERIAL) ENGINE=InnoDB;
SET debug_dbug= '+d,ib_log_flush_ahead';

Expand Down
15 changes: 12 additions & 3 deletions storage/innobase/buf/buf0buf.cc
Expand Up @@ -207,7 +207,7 @@ the common LRU list. That is, each manipulation of the common LRU
list will result in the same manipulation of the unzip_LRU list.
The chain of modified blocks (buf_pool.flush_list) contains the blocks
holding file pages that have been modified in the memory
holding persistent file pages that have been modified in the memory
but not written to disk yet. The block with the oldest modification
which has not yet been written to disk is at the end of the chain.
The access to this list is protected by buf_pool.flush_list_mutex.
Expand Down Expand Up @@ -1346,6 +1346,12 @@ inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const
break;
}

if (fsp_is_system_temporary(block->page.id().space()))
{
ut_ad(block->page.oldest_modification() <= 1);
break;
}

if (!block->page.ready_for_replace())
return block;

Expand Down Expand Up @@ -1500,8 +1506,10 @@ void buf_pool_t::close()
/* The buffer pool must be clean during normal shutdown.
Only on aborted startup (with recovery) or with innodb_fast_shutdown=2
we may discard changes. */
ut_ad(!bpage->oldest_modification() || srv_is_being_started ||
srv_fast_shutdown == 2);
ut_d(const lsn_t oldest= bpage->oldest_modification();)
ut_ad(!oldest || srv_is_being_started ||
srv_fast_shutdown == 2 ||
(oldest == 1 && fsp_is_system_temporary(bpage->id().space())));

if (bpage->state() != BUF_BLOCK_FILE_PAGE)
buf_page_free_descriptor(bpage);
Expand Down Expand Up @@ -4349,6 +4357,7 @@ void buf_pool_t::validate()
for (buf_page_t* b = UT_LIST_GET_FIRST(flush_list); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_ad(b->oldest_modification());
ut_ad(!fsp_is_system_temporary(b->id().space()));
n_flushing++;

switch (b->state()) {
Expand Down
67 changes: 40 additions & 27 deletions storage/innobase/buf/buf0flu.cc
Expand Up @@ -148,6 +148,7 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
mysql_mutex_assert_not_owner(&buf_pool.mutex);
mysql_mutex_assert_owner(&log_sys.flush_order_mutex);
ut_ad(lsn);
ut_ad(!fsp_is_system_temporary(block->page.id().space()));

mysql_mutex_lock(&buf_pool.flush_list_mutex);
block->page.set_oldest_modification(lsn);
Expand All @@ -163,24 +164,27 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}

/** Remove a block from buf_pool.flush_list */
static void buf_flush_remove_low(buf_page_t *bpage)
{
ut_ad(!fsp_is_system_temporary(bpage->id().space()));
mysql_mutex_assert_owner(&buf_pool.mutex);
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
ut_ad(!bpage->oldest_modification());
buf_pool.flush_hp.adjust(bpage);
UT_LIST_REMOVE(buf_pool.flush_list, bpage);
buf_pool.stat.flush_list_bytes -= bpage->physical_size();
#ifdef UNIV_DEBUG
buf_flush_validate_skip();
#endif /* UNIV_DEBUG */
}

/** Remove a block from the flush list of modified blocks.
@param[in,out] bpage block to be removed from the flush list */
static void buf_flush_remove(buf_page_t *bpage)
{
mysql_mutex_assert_owner(&buf_pool.mutex);
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);

/* Important that we adjust the hazard pointer before removing
the bpage from flush list. */
buf_pool.flush_hp.adjust(bpage);
UT_LIST_REMOVE(buf_pool.flush_list, bpage);
bpage->clear_oldest_modification();

buf_pool.stat.flush_list_bytes -= bpage->physical_size();

#ifdef UNIV_DEBUG
buf_flush_validate_skip();
#endif /* UNIV_DEBUG */
bpage->clear_oldest_modification();
buf_flush_remove_low(bpage);
}

/** Remove all dirty pages belonging to a given tablespace when we are
Expand Down Expand Up @@ -280,6 +284,7 @@ buf_flush_relocate_on_flush_list(
buf_page_t* prev;

mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(!fsp_is_system_temporary(bpage->id().space()));

if (!bpage->oldest_modification()) {
return;
Expand Down Expand Up @@ -356,11 +361,19 @@ void buf_page_write_complete(const IORequest &request)
DBUG_PRINT("ib_buf", ("write page %u:%u",
bpage->id().space(), bpage->id().page_no()));
ut_ad(request.is_LRU() ? buf_pool.n_flush_LRU : buf_pool.n_flush_list);
const bool temp= fsp_is_system_temporary(bpage->id().space());

mysql_mutex_lock(&buf_pool.mutex);
bpage->set_io_fix(BUF_IO_NONE);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_flush_remove(bpage);
ut_ad(!temp || bpage->oldest_modification() == 1);
bpage->clear_oldest_modification();

if (!temp)
buf_flush_remove_low(bpage);
else
ut_ad(request.is_LRU());

mysql_mutex_unlock(&buf_pool.flush_list_mutex);

if (dblwr)
Expand Down Expand Up @@ -787,8 +800,13 @@ static void buf_release_freed_page(buf_page_t *bpage)
mysql_mutex_lock(&buf_pool.mutex);
bpage->set_io_fix(BUF_IO_NONE);
bpage->status= buf_page_t::NORMAL;
const bool temp= fsp_is_system_temporary(bpage->id().space());
ut_ad(!temp || uncompressed);
ut_ad(!temp || bpage->oldest_modification() == 1);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_flush_remove(bpage);
bpage->clear_oldest_modification();
if (!temp)
buf_flush_remove_low(bpage);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);

if (uncompressed)
Expand Down Expand Up @@ -1552,7 +1570,7 @@ ulint buf_flush_lists(ulint max_n, lsn_t lsn)
const bool running= n_flush != 0;
/* FIXME: we are performing a dirty read of buf_pool.flush_list.count
while not holding buf_pool.flush_list_mutex */
if (running || !UT_LIST_GET_LEN(buf_pool.flush_list))
if (running || (lsn && !UT_LIST_GET_LEN(buf_pool.flush_list)))
{
if (!running)
mysql_cond_broadcast(cond);
Expand Down Expand Up @@ -2098,7 +2116,6 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)

if (!dirty_blocks)
{
unemployed2:
if (UNIV_UNLIKELY(lsn_limit != 0))
{
buf_flush_sync_lsn= 0;
Expand All @@ -2119,14 +2136,9 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
if (dirty_pct < srv_max_dirty_pages_pct_lwm && !lsn_limit)
goto unemployed;

const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);

#if 0 /* MDEV-12227 FIXME: enable this */
ut_ad(oldest_lsn); /* dirty_blocks implies this */
#else
if (!oldest_lsn)
goto unemployed2;
#endif
const lsn_t oldest_lsn= buf_pool.get_oldest_modified()
->oldest_modification();
ut_ad(oldest_lsn);

if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit)
buf_flush_sync_lsn= 0;
Expand Down Expand Up @@ -2307,7 +2319,8 @@ void buf_flush_sync()
struct Check {
void operator()(const buf_page_t* elem) const
{
ut_a(elem->oldest_modification());
ut_ad(elem->oldest_modification());
ut_ad(!fsp_is_system_temporary(elem->id().space()));
}
};

Expand Down
29 changes: 21 additions & 8 deletions storage/innobase/include/buf0buf.h
Expand Up @@ -940,6 +940,15 @@ class buf_page_t
/** Clear oldest_modification when removing from buf_pool.flush_list */
inline void clear_oldest_modification();

/** Notify that a page in a temporary tablespace has been modified. */
void set_temp_modified()
{
ut_ad(fsp_is_system_temporary(id().space()));
ut_ad(state() == BUF_BLOCK_FILE_PAGE);
ut_ad(!oldest_modification());
oldest_modification_= 1;
}

/** Prepare to release a file page to buf_pool.free. */
void free_file_page()
{
Expand Down Expand Up @@ -1552,18 +1561,22 @@ class buf_pool_t
bool is_block_lock(const rw_lock_t *l) const
{ return is_block_field(static_cast<const void*>(l)); }

/** @return the block that was made dirty the longest time ago */
const buf_page_t *get_oldest_modified() const
{
mysql_mutex_assert_owner(&flush_list_mutex);
const buf_page_t *bpage= UT_LIST_GET_LAST(flush_list);
ut_ad(!bpage || !fsp_is_system_temporary(bpage->id().space()));
ut_ad(!bpage || bpage->oldest_modification());
return bpage;
}

/**
@return the smallest oldest_modification lsn for any page
@retval empty_lsn if all modified persistent pages have been flushed */
lsn_t get_oldest_modification(lsn_t empty_lsn)
lsn_t get_oldest_modification(lsn_t empty_lsn) const
{
mysql_mutex_assert_owner(&flush_list_mutex);
const buf_page_t *bpage= UT_LIST_GET_LAST(flush_list);
#if 1 /* MDEV-12227 FIXME: remove this loop */
for (; bpage && fsp_is_system_temporary(bpage->id().space());
bpage= UT_LIST_GET_PREV(list, bpage))
ut_ad(bpage->oldest_modification());
#endif
const buf_page_t *bpage= get_oldest_modified();
return bpage ? bpage->oldest_modification() : empty_lsn;
}

Expand Down
6 changes: 4 additions & 2 deletions storage/innobase/include/buf0flu.ic
Expand Up @@ -65,10 +65,12 @@ buf_flush_note_modification(

const lsn_t oldest_modification = block->page.oldest_modification();

if (!oldest_modification) {
if (oldest_modification) {
ut_ad(oldest_modification <= start_lsn);
} else if (!fsp_is_system_temporary(block->page.id().space())) {
buf_flush_insert_into_flush_list(block, start_lsn);
} else {
ut_ad(oldest_modification <= start_lsn);
block->page.set_temp_modified();
}

srv_stats.buf_pool_write_requests.inc();
Expand Down

0 comments on commit 5eb5395

Please sign in to comment.