Skip to content

Commit 1738c0f

Browse files
committed
MDEV-22169 Recovery fails after failing to insert into mlog_init
In a multi-batch recovery, we must ensure that INIT_PAGE and especially the MDEV-15528 FREE_PAGE records will be taken properly into account. Writing a FREE_PAGE record gives the server permission to omit a page write. If recovery insists on applying log to a page whose page flush has been omitted, then the consistency checks in the application of high-level redo log records (appending an undo log record, inserting or deleting an index record) will likely fail. mlog_init_t::add(): Return whether the state was changed. mlog_init_t::will_avoid_read(): Determine whether a page read will be avoided and whether older log records can be safely skipped. recv_sys_t::parse(): Even if store==STORE_NO, process the records INIT_PAGE and FREE_PAGE. While processing them, we can delete older redo log records for the page. If store!=STORE_NO, we can directly skip redo log recods of other types if mlog_init indicates that the page will be freed or initialized by at a later LSN. This fix was developed in cooperation with Thirunarayanan Balathandayuthapani.
1 parent d848fca commit 1738c0f

File tree

1 file changed

+40
-12
lines changed

1 file changed

+40
-12
lines changed

storage/innobase/log/log0recv.cc

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,11 @@ struct log_phys_t : public log_rec_t
107107

108108
/** @return start of the log records */
109109
byte *begin() { return reinterpret_cast<byte*>(&len + 1); }
110-
/** @return start of the log records */
111-
const byte *begin() const { return const_cast<log_phys_t*>(this)->begin(); }
112110
/** @return end of the log records */
113111
byte *end() { byte *e= begin() + len; ut_ad(!*e); return e; }
114112
public:
113+
/** @return start of the log records */
114+
const byte *begin() const { return const_cast<log_phys_t*>(this)->begin(); }
115115
/** @return end of the log records */
116116
const byte *end() const { return const_cast<log_phys_t*>(this)->end(); }
117117

@@ -598,17 +598,19 @@ class mlog_init_t
598598
public:
599599
/** Record that a page will be initialized by the redo log.
600600
@param[in] page_id page identifier
601-
@param[in] lsn log sequence number */
602-
void add(const page_id_t page_id, lsn_t lsn)
601+
@param[in] lsn log sequence number
602+
@return whether the state was changed */
603+
bool add(const page_id_t page_id, lsn_t lsn)
603604
{
604605
ut_ad(mutex_own(&recv_sys.mutex));
605606
const init init = { lsn, false };
606607
std::pair<map::iterator, bool> p = inits.insert(
607608
map::value_type(page_id, init));
608609
ut_ad(!p.first->second.created);
609-
if (!p.second && p.first->second.lsn < init.lsn) {
610-
p.first->second = init;
611-
}
610+
if (p.second) return true;
611+
if (p.first->second.lsn >= init.lsn) return false;
612+
p.first->second = init;
613+
return true;
612614
}
613615

614616
/** Get the last stored lsn of the page id and its respective
@@ -623,6 +625,17 @@ class mlog_init_t
623625
return inits.find(page_id)->second;
624626
}
625627

628+
/** Determine if a page will be initialized or freed after a time.
629+
@param page_id page identifier
630+
@param lsn log sequence number
631+
@return whether page_id will be freed or initialized after lsn */
632+
bool will_avoid_read(page_id_t page_id, lsn_t lsn) const
633+
{
634+
ut_ad(mutex_own(&recv_sys.mutex));
635+
auto i= inits.find(page_id);
636+
return i != inits.end() && i->second.lsn > lsn;
637+
}
638+
626639
/** At the end of each recovery batch, reset the 'created' flags. */
627640
void reset()
628641
{
@@ -1938,7 +1951,7 @@ bool recv_sys_t::parse(lsn_t checkpoint_lsn, store_t store, bool apply)
19381951

19391952
if (got_page_op)
19401953
{
1941-
ut_d(const page_id_t id(space_id, page_no));
1954+
const page_id_t id(space_id, page_no);
19421955
ut_d(if ((b & 0x70) == INIT_PAGE) freed.erase(id));
19431956
ut_ad(freed.find(id) == freed.end());
19441957
switch (b & 0x70) {
@@ -2059,16 +2072,31 @@ bool recv_sys_t::parse(lsn_t checkpoint_lsn, store_t store, bool apply)
20592072
ut_ad(modified.emplace(id).second || (b & 0x70) != INIT_PAGE);
20602073
}
20612074
#endif
2075+
const bool is_init= (b & 0x70) <= INIT_PAGE;
20622076
switch (store) {
2063-
case STORE_NO:
2064-
continue;
20652077
case STORE_IF_EXISTS:
20662078
if (!fil_space_get_size(space_id))
20672079
continue;
20682080
/* fall through */
20692081
case STORE_YES:
2070-
add(page_id_t(space_id, page_no), start_lsn, end_lsn, recs,
2071-
static_cast<size_t>(l + rlen - recs));
2082+
if (is_init || !mlog_init.will_avoid_read(id, start_lsn))
2083+
add(id, start_lsn, end_lsn, recs,
2084+
static_cast<size_t>(l + rlen - recs));
2085+
continue;
2086+
case STORE_NO:
2087+
if (!is_init)
2088+
continue;
2089+
map::iterator i= pages.find(id);
2090+
if (i == pages.end())
2091+
continue;
2092+
if ((*static_cast<const log_phys_t*>(*i->second.log.begin())->begin() &
2093+
0x70) <= INIT_PAGE)
2094+
{
2095+
ut_ad(i->second.state == page_recv_t::RECV_WILL_NOT_READ);
2096+
continue;
2097+
}
2098+
pages.erase(i);
2099+
mlog_init.add(id, start_lsn);
20722100
}
20732101
}
20742102
#if 1 /* MDEV-14425 FIXME: this must be in the checkpoint file only! */

0 commit comments

Comments
 (0)