Skip to content

Commit 727b549

Browse files
dr-msanja-byelkin
authored andcommitted
MDEV-34212 InnoDB transaction recovery is incorrect
trx_undo_mem_create_at_db_start(): Invoke recv_sys_t::recover() instead of buf_page_get_gen(), so that all undo log pages will be recovered correctly. Failure to do this could prevent InnoDB from starting up due to "Data structure corruption", or it could potentially lead to a situation where InnoDB starts up but some transactions were recovered incorrectly. recv_sys_t::recover(): Only acquire a buffer-fix on the pages, not a shared latch. This is adequate protection, because this function is only being invoked during early startup when no "users" are modifying buffer pool pages. The only writes are due to server bootstrap (the data files being created) or crash recovery (changes from ib_logfile0 being applied). buf_page_get_gen(): Assert that the function is not invoked while crash recovery is in progress, and that the special mode BUF_GET_RECOVER is only invoked during crash recovery or server bootstrap. All this should really have been part of commit 850d617 (MDEV-32042).
1 parent 6c0eb29 commit 727b549

File tree

4 files changed

+17
-7
lines changed

4 files changed

+17
-7
lines changed

storage/innobase/buf/buf0buf.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2587,6 +2587,15 @@ buf_page_get_gen(
25872587
{
25882588
ulint retries = 0;
25892589

2590+
/* BUF_GET_RECOVER is only used by recv_sys_t::recover(),
2591+
which must be invoked during early server startup when crash
2592+
recovery may be in progress. The only case when it may be
2593+
invoked outside recovery is when dict_create() has initialized
2594+
a new database and is invoking dict_boot(). In this case, the
2595+
LSN will be small. */
2596+
ut_ad(mode == BUF_GET_RECOVER
2597+
? recv_recovery_is_on() || log_sys.get_lsn() < 50000
2598+
: !recv_recovery_is_on() || recv_sys.after_apply);
25902599
ut_ad(!mtr || mtr->is_active());
25912600
ut_ad(mtr || mode == BUF_PEEK_IF_IN_POOL);
25922601
ut_ad((rw_latch == RW_S_LATCH)
@@ -2608,6 +2617,7 @@ buf_page_get_gen(
26082617
/* The caller may pass a dummy page size,
26092618
because it does not really matter. */
26102619
break;
2620+
case BUF_GET_RECOVER:
26112621
case BUF_GET:
26122622
ut_ad(!mtr->is_freeing_tree());
26132623
fil_space_t* s = fil_space_get(page_id.space());

storage/innobase/include/buf0buf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Created 11/5/1995 Heikki Tuuri
4545
/** @name Modes for buf_page_get_gen */
4646
/* @{ */
4747
#define BUF_GET 10 /*!< get always */
48+
#define BUF_GET_RECOVER 9 /*!< like BUF_GET, but in recv_sys.recover() */
4849
#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
4950
#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make
5051
the block young in the LRU list */

storage/innobase/log/log0recv.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3678,8 +3678,8 @@ recv_sys_t::recover(const page_id_t page_id, mtr_t *mtr, dberr_t *err)
36783678
{
36793679
if (!recovery_on)
36803680
must_read:
3681-
return buf_page_get_gen(page_id, 0, RW_S_LATCH, nullptr, BUF_GET, mtr,
3682-
err);
3681+
return buf_page_get_gen(page_id, 0, RW_NO_LATCH, nullptr, BUF_GET_RECOVER,
3682+
mtr, err);
36833683

36843684
mysql_mutex_lock(&mutex);
36853685
map::iterator p= pages.find(page_id);
@@ -3728,7 +3728,7 @@ recv_sys_t::recover(const page_id_t page_id, mtr_t *mtr, dberr_t *err)
37283728
goto corrupted;
37293729
}
37303730

3731-
mtr->page_lock(block, RW_S_LATCH);
3731+
mtr->page_lock(block, RW_NO_LATCH);
37323732
return block;
37333733
}
37343734

storage/innobase/trx/trx0undo.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -980,7 +980,7 @@ trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no)
980980

981981
mtr.start();
982982
const page_id_t page_id{rseg->space->id, page_no};
983-
const buf_block_t* block = buf_page_get(page_id, 0, RW_X_LATCH, &mtr);
983+
const buf_block_t* block = recv_sys.recover(page_id, &mtr, nullptr);
984984
if (UNIV_UNLIKELY(!block)) {
985985
corrupted:
986986
mtr.commit();
@@ -1094,9 +1094,8 @@ trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no)
10941094
undo->last_page_no = last_addr.page;
10951095
undo->top_page_no = last_addr.page;
10961096

1097-
const buf_block_t* last = buf_page_get(
1098-
page_id_t(rseg->space->id, undo->last_page_no), 0,
1099-
RW_X_LATCH, &mtr);
1097+
const buf_block_t* last = recv_sys.recover(
1098+
page_id_t(rseg->space->id, undo->last_page_no), &mtr, nullptr);
11001099

11011100
if (UNIV_UNLIKELY(!last)) {
11021101
goto corrupted_undo;

0 commit comments

Comments
 (0)