Skip to content

Commit

Permalink
MDEV-32042 Simplify buf_page_get_gen()
Browse files Browse the repository at this point in the history
buf_page_get_low(): Rename to buf_page_get_gen(), and assume that no
crash recovery is needed.

recv_sys_t::recover(): Replaces the old buf_page_get_gen(). Read a page
while crash recovery is in progress.

trx_rseg_get_n_undo_tablespaces(), ibuf_upgrade_needed():
Invoke recv_sys.recover() instead of buf_page_get_gen().

dict_boot(): Invoke recv_sys.recover() instead of buf_page_get_gen().
Do not load the system tables.

srv_start(): Load the system tables and the undo logs after all redo log
has been applied in recv_sys.apply(true) and we can safely invoke the
regular buf_page_get_gen().
  • Loading branch information
dr-m committed Dec 4, 2023
1 parent b42f318 commit 850d617
Show file tree
Hide file tree
Showing 10 changed files with 115 additions and 156 deletions.
6 changes: 3 additions & 3 deletions storage/innobase/btr/btr0btr.cc
Expand Up @@ -1114,9 +1114,9 @@ void btr_drop_temporary_table(const dict_table_t &table)
for (const dict_index_t *index= table.indexes.start; index;
index= dict_table_get_next_index(index))
{
if (buf_block_t *block= buf_page_get_low({SRV_TMP_SPACE_ID, index->page}, 0,
RW_X_LATCH, nullptr, BUF_GET, &mtr,
nullptr))
if (buf_block_t *block= buf_page_get_gen({SRV_TMP_SPACE_ID, index->page},
0, RW_X_LATCH, nullptr, BUF_GET,
&mtr, nullptr))
{
btr_free_but_not_root(block, MTR_LOG_NO_REDO);
mtr.set_log_mode(MTR_LOG_NO_REDO);
Expand Down
64 changes: 4 additions & 60 deletions storage/innobase/buf/buf0buf.cc
Expand Up @@ -2585,7 +2585,7 @@ or BUF_PEEK_IF_IN_POOL
@return pointer to the block or NULL */
TRANSACTIONAL_TARGET
buf_block_t*
buf_page_get_low(
buf_page_get_gen(
const page_id_t page_id,
ulint zip_size,
ulint rw_latch,
Expand Down Expand Up @@ -2778,7 +2778,7 @@ buf_page_get_low(
wait_for_unzip:
/* The page is being read or written, or
another thread is executing buf_zip_decompress()
in buf_page_get_low() on it. */
in buf_page_get_gen() on it. */
block->page.unfix();
std::this_thread::sleep_for(
std::chrono::microseconds(100));
Expand All @@ -2801,10 +2801,7 @@ buf_page_get_low(
ut_ad(&block->page == buf_pool.page_hash.get(page_id, chain));

/* Wait for any other threads to release their buffer-fix
on the compressed-only block descriptor.
FIXME: Never fix() before acquiring the lock.
Only in buf_page_get_gen(), buf_page_get_low(), buf_page_free()
we are violating that principle. */
on the compressed-only block descriptor. */
state = block->page.state();

switch (state) {
Expand All @@ -2830,7 +2827,7 @@ buf_page_get_low(
goto wait_for_unfix;
}

/* Ensure that another buf_page_get_low() will wait for
/* Ensure that another buf_page_get_gen() will wait for
new_block->page.lock.x_unlock(). */
block->page.set_state(buf_page_t::READ_FIX);

Expand Down Expand Up @@ -2954,59 +2951,6 @@ buf_page_get_low(
return block;
}

/** Get access to a database page. Buffered redo log may be applied.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction, or NULL
@param[out] err DB_SUCCESS or error code
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_gen(
const page_id_t page_id,
ulint zip_size,
ulint rw_latch,
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
dberr_t* err)
{
buf_block_t *block= recv_sys.recover(page_id);
if (UNIV_LIKELY(!block))
return buf_page_get_low(page_id, zip_size, rw_latch,
guess, mode, mtr, err);
else if (UNIV_UNLIKELY(block == reinterpret_cast<buf_block_t*>(-1)))
{
corrupted:
if (err)
*err= DB_CORRUPTION;
return nullptr;
}
if (err)
*err= DB_SUCCESS;
/* Recovery is a special case; we fix() before acquiring lock. */
auto s= block->page.fix();
ut_ad(s >= buf_page_t::FREED);
/* The block may be write-fixed at this point because we are not
holding a lock, but it must not be read-fixed. */
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
if (s < buf_page_t::UNFIXED)
{
ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL);
mysql_mutex_lock(&buf_pool.mutex);
block->page.unfix();
buf_LRU_free_page(&block->page, true);
mysql_mutex_unlock(&buf_pool.mutex);
goto corrupted;
}

mtr->page_lock(block, rw_latch);
return block;
}

/********************************************************************//**
This is the general function used to get optimistic access to a database
page.
Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/buf/buf0lru.cc
Expand Up @@ -815,7 +815,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)

/* We must hold an exclusive hash_lock to prevent
bpage->can_relocate() from changing due to a concurrent
execution of buf_page_get_low(). */
execution of buf_page_get_gen(). */
buf_pool_t::hash_chain& chain= buf_pool.page_hash.cell_get(id.fold());
page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain);
/* We cannot use transactional_lock_guard here,
Expand Down
18 changes: 2 additions & 16 deletions storage/innobase/dict/dict0boot.cc
Expand Up @@ -218,8 +218,7 @@ dberr_t dict_boot()
dict_sys.create();

dberr_t err;
const buf_block_t *d = buf_page_get_gen(hdr_page_id, 0, RW_S_LATCH,
nullptr, BUF_GET, &mtr, &err);
const buf_block_t *d = recv_sys.recover(hdr_page_id, &mtr ,&err);
if (!d) {
mtr.commit();
return err;
Expand Down Expand Up @@ -393,19 +392,6 @@ dberr_t dict_boot()
UT_BITS_IN_BYTES(unsigned(table->indexes.start->n_nullable)));

mtr.commit();

if (err == DB_SUCCESS) {
/* Load definitions of other indexes on system tables */

dict_load_sys_table(dict_sys.sys_tables);
dict_load_sys_table(dict_sys.sys_columns);
dict_load_sys_table(dict_sys.sys_indexes);
dict_load_sys_table(dict_sys.sys_fields);
dict_sys.unlock();
dict_sys.load_sys_tables();
} else {
dict_sys.unlock();
}

dict_sys.unlock();
return err;
}
6 changes: 2 additions & 4 deletions storage/innobase/ibuf/ibuf0ibuf.cc
Expand Up @@ -1012,8 +1012,7 @@ dberr_t ibuf_upgrade_needed()
mtr.start();
mtr.x_lock_space(fil_system.sys_space);
dberr_t err;
const buf_block_t *header_page=
buf_page_get_gen(ibuf_header, 0, RW_S_LATCH, nullptr, BUF_GET, &mtr, &err);
const buf_block_t *header_page= recv_sys.recover(ibuf_header, &mtr, &err);

if (!header_page)
{
Expand All @@ -1026,8 +1025,7 @@ dberr_t ibuf_upgrade_needed()
return err;
}

const buf_block_t *root= buf_page_get_gen(ibuf_root, 0, RW_S_LATCH, nullptr,
BUF_GET, &mtr, &err);
const buf_block_t *root= recv_sys.recover(ibuf_root, &mtr, &err);
if (!root)
goto err_exit;

Expand Down
26 changes: 2 additions & 24 deletions storage/innobase/include/buf0buf.h
Expand Up @@ -201,34 +201,12 @@ buf_page_get_gen(
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
dberr_t* err = NULL)
MY_ATTRIBUTE((nonnull(6)));

/** This is the low level function used to get access to a database page.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction, or NULL if a
block with page_id is to be evicted
@param[out] err DB_SUCCESS or error code
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_low(
const page_id_t page_id,
ulint zip_size,
ulint rw_latch,
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
dberr_t* err);
dberr_t* err = nullptr);

/** Initialize a page in the buffer pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED => LRU
(the other is buf_page_get_low()).
(the other is buf_page_get_gen()).
@param[in,out] space space object
@param[in] offset offset of the tablespace
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
Expand Down
21 changes: 7 additions & 14 deletions storage/innobase/include/log0recv.h
Expand Up @@ -280,12 +280,6 @@ struct recv_sys_t
@retval -1 if the page cannot be recovered due to corruption */
inline buf_block_t *recover_low(const map::iterator &p, mtr_t &mtr,
buf_block_t *b, lsn_t init_lsn);
/** Attempt to initialize a page based on redo log records.
@param page_id page identifier
@return the recovered block
@retval nullptr if the page cannot be initialized based on log records
@retval -1 if the page cannot be recovered due to corruption */
ATTRIBUTE_COLD buf_block_t *recover_low(const page_id_t page_id);

/** All found log files (multiple ones are possible if we are upgrading
from before MariaDB Server 10.5.1) */
Expand Down Expand Up @@ -430,15 +424,14 @@ struct recv_sys_t
/** @return whether log file corruption was found */
bool is_corrupt_log() const { return UNIV_UNLIKELY(found_corrupt_log); }

/** Attempt to initialize a page based on redo log records.
/** Read a page or recover it based on redo log records.
@param page_id page identifier
@return the recovered block
@retval nullptr if the page cannot be initialized based on log records
@retval -1 if the page cannot be recovered due to corruption */
buf_block_t *recover(const page_id_t page_id)
{
return UNIV_UNLIKELY(recovery_on) ? recover_low(page_id) : nullptr;
}
@param mtr mini-transaction
@param err error code
@return the requested block
@retval nullptr if the page cannot be accessed due to corruption */
ATTRIBUTE_COLD
buf_block_t *recover(const page_id_t page_id, mtr_t *mtr, dberr_t *err);

/** Try to recover a tablespace that was not readable earlier
@param p iterator
Expand Down
71 changes: 54 additions & 17 deletions storage/innobase/log/log0recv.cc
Expand Up @@ -3649,33 +3649,70 @@ inline buf_block_t *recv_sys_t::recover_low(const map::iterator &p, mtr_t &mtr,
return block ? block : reinterpret_cast<buf_block_t*>(-1);
}

/** Attempt to initialize a page based on redo log records.
/** Read a page or recover it based on redo log records.
@param page_id page identifier
@return recovered block
@retval nullptr if the page cannot be initialized based on log records */
ATTRIBUTE_COLD buf_block_t *recv_sys_t::recover_low(const page_id_t page_id)
@param mtr mini-transaction
@param err error code
@return the requested block
@retval nullptr if the page cannot be accessed due to corruption */
ATTRIBUTE_COLD
buf_block_t *
recv_sys_t::recover(const page_id_t page_id, mtr_t *mtr, dberr_t *err)
{
if (!recovery_on)
must_read:
return buf_page_get_gen(page_id, 0, RW_S_LATCH, nullptr, BUF_GET, mtr,
err);

mysql_mutex_lock(&mutex);
map::iterator p= pages.find(page_id);

if (p != pages.end() && !p->second.being_processed && p->second.skip_read)
if (p == pages.end() || p->second.being_processed || !p->second.skip_read)
{
p->second.being_processed= 1;
const lsn_t init_lsn{mlog_init.last(page_id)};
mysql_mutex_unlock(&mutex);
buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
mtr_t mtr;
buf_block_t *block= recover_low(p, mtr, free_block, init_lsn);
p->second.being_processed= -1;
ut_ad(!block || block == reinterpret_cast<buf_block_t*>(-1) ||
block == free_block);
if (UNIV_UNLIKELY(!block))
buf_pool.free_block(free_block);
return block;
goto must_read;
}

p->second.being_processed= 1;
const lsn_t init_lsn{mlog_init.last(page_id)};
mysql_mutex_unlock(&mutex);
return nullptr;
buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
buf_block_t *block;
{
mtr_t local_mtr;
block= recover_low(p, local_mtr, free_block, init_lsn);
}
p->second.being_processed= -1;
if (UNIV_UNLIKELY(!block))
{
buf_pool.free_block(free_block);
goto must_read;
}
else if (block == reinterpret_cast<buf_block_t*>(-1))
{
corrupted:
if (err)
*err= DB_CORRUPTION;
return nullptr;
}

ut_ad(block == free_block);
auto s= block->page.fix();
ut_ad(s >= buf_page_t::FREED);
/* The block may be write-fixed at this point because we are not
holding a latch, but it must not be read-fixed. */
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
if (s < buf_page_t::UNFIXED)
{
mysql_mutex_lock(&buf_pool.mutex);
block->page.unfix();
buf_LRU_free_page(&block->page, true);
mysql_mutex_unlock(&buf_pool.mutex);
goto corrupted;
}

mtr->page_lock(block, RW_S_LATCH);
return block;
}

inline fil_space_t *fil_system_t::find(const char *path) const
Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/row/row0import.cc
Expand Up @@ -2074,7 +2074,7 @@ dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
/* If we already had an old page with matching number
in the buffer pool, evict it now, because
we no longer evict the pages on DISCARD TABLESPACE. */
buf_page_get_low(block->page.id(), get_zip_size(), RW_NO_LATCH,
buf_page_get_gen(block->page.id(), get_zip_size(), RW_NO_LATCH,
nullptr, BUF_PEEK_IF_IN_POOL,
nullptr, nullptr);

Expand Down

0 comments on commit 850d617

Please sign in to comment.