Skip to content

Commit

Permalink
MDEV-32050 preparation: Simplify ROLLBACK
Browse files Browse the repository at this point in the history
undo_node_t::state: Replaced with bool is_temp.

row_undo_rec_get(): Do not copy the undo log record.
The motivation of the copying was to not hold latches on the undo pages
and therefore to avoid deadlocks due to lock order inversion a.k.a.
latching order violation: It is not allowed to wait for an index page latch
while holding an undo page latch, because MVCC reads would first acquire
an index page latch and then an undo page latch. But, in rollback, we
do not actually need any latch on our own undo pages. The transaction
that is being rolled back is the exclusive owner of its undo log records.
They cannot be overwritten by other threads until the rollback is complete.
Therefore, a buffer fix will protect the undo log record just fine,
by preventing page eviction. We still must initially acquire a shared latch
on each undo page, to avoid a race condition like the one that was fixed in
commit b102872.

row_undo_ins_parse_undo_rec(): The first two bytes of the undo log record
now are the pointer to the next record within the page, not a length.

Reviewed by: Vladislav Lesin
  • Loading branch information
dr-m committed Oct 25, 2023
1 parent b78b77e commit ea42c4b
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 61 deletions.
16 changes: 1 addition & 15 deletions storage/innobase/include/row0undo.h
Expand Up @@ -78,24 +78,10 @@ just in the case where the transaction modified the same record several times
and another thread is currently doing the undo for successive versions of
that index record. */

/** Execution state of an undo node */
enum undo_exec {
UNDO_NODE_FETCH_NEXT = 1, /*!< we should fetch the next
undo log record */
/** rollback an insert into persistent table */
UNDO_INSERT_PERSISTENT,
/** rollback an update (or delete) in a persistent table */
UNDO_UPDATE_PERSISTENT,
/** rollback an insert into temporary table */
UNDO_INSERT_TEMPORARY,
/** rollback an update (or delete) in a temporary table */
UNDO_UPDATE_TEMPORARY,
};

/** Undo node structure */
struct undo_node_t{
que_common_t common; /*!< node type: QUE_NODE_UNDO */
undo_exec state; /*!< rollback execution state */
bool is_temp;/*!< whether this is a temporary table */
trx_t* trx; /*!< trx for which undo is done */
roll_ptr_t roll_ptr;/*!< roll pointer to undo log record */
trx_undo_rec_t* undo_rec;/*!< undo log record */
Expand Down
6 changes: 2 additions & 4 deletions storage/innobase/row/row0uins.cc
Expand Up @@ -389,16 +389,14 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked)
ulint dummy;
bool dummy_extern;

ut_ad(node->state == UNDO_INSERT_PERSISTENT
|| node->state == UNDO_INSERT_TEMPORARY);
ut_ad(node->trx->in_rollback);
ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr));

ptr = trx_undo_rec_get_pars(node->undo_rec, &node->rec_type, &dummy,
&dummy_extern, &undo_no, &table_id);

node->update = NULL;
if (node->state == UNDO_INSERT_PERSISTENT) {
if (!node->is_temp) {
node->table = dict_table_open_on_id(table_id, dict_locked,
DICT_TABLE_OP_NORMAL);
} else if (!dict_locked) {
Expand Down Expand Up @@ -428,7 +426,7 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked)
|| dict_table_is_file_per_table(table)
== !is_system_tablespace(table->space_id));
size_t len = mach_read_from_2(node->undo_rec)
+ size_t(node->undo_rec - ptr) - 2;
- page_offset(ptr) - 2;
const span<const char> name(reinterpret_cast<const char*>(ptr),
len);
if (strlen(table->name.m_name) != len
Expand Down
4 changes: 1 addition & 3 deletions storage/innobase/row/row0umod.cc
Expand Up @@ -1085,8 +1085,6 @@ static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked)
ulint cmpl_info;
bool dummy_extern;

ut_ad(node->state == UNDO_UPDATE_PERSISTENT
|| node->state == UNDO_UPDATE_TEMPORARY);
ut_ad(node->trx->in_rollback);
ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));

Expand All @@ -1095,7 +1093,7 @@ static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked)
&dummy_extern, &undo_no, &table_id);
node->rec_type = type;

if (node->state == UNDO_UPDATE_PERSISTENT) {
if (!node->is_temp) {
node->table = dict_table_open_on_id(table_id, dict_locked,
DICT_TABLE_OP_NORMAL);
} else if (!dict_locked) {
Expand Down
64 changes: 25 additions & 39 deletions storage/innobase/row/row0undo.cc
Expand Up @@ -140,7 +140,6 @@ row_undo_node_create(
undo->common.type = QUE_NODE_UNDO;
undo->common.parent = parent;

undo->state = UNDO_NODE_FETCH_NEXT;
undo->trx = trx;

btr_pcur_init(&(undo->pcur));
Expand Down Expand Up @@ -219,8 +218,7 @@ row_undo_search_clust_to_pcur(
log, first mark them DATA_MISSING. So we will know if the
value gets updated */
if (node->table->n_v_cols
&& (node->state == UNDO_UPDATE_PERSISTENT
|| node->state == UNDO_UPDATE_TEMPORARY)
&& !trx_undo_roll_ptr_is_insert(node->roll_ptr)
&& !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
for (ulint i = 0;
i < dict_table_get_n_v_cols(node->table); i++) {
Expand Down Expand Up @@ -258,8 +256,9 @@ row_undo_search_clust_to_pcur(

/** Get the latest undo log record for rollback.
@param[in,out] node rollback context
@return whether an undo log record was fetched */
static bool row_undo_rec_get(undo_node_t* node)
@return undo block for the undo log record
@retval nullptr if no undo log record was fetched */
static buf_block_t* row_undo_rec_get(undo_node_t* node)
{
trx_t* trx = node->trx;

Expand All @@ -272,7 +271,7 @@ static bool row_undo_rec_get(undo_node_t* node)
trx_undo_t* update = trx->rsegs.m_redo.undo;
trx_undo_t* temp = trx->rsegs.m_noredo.undo;
const undo_no_t limit = trx->roll_limit;
bool is_temp = false;
node->is_temp = false;

ut_ad(!update || !temp || update->empty() || temp->empty()
|| update->top_undo_no != temp->top_undo_no);
Expand All @@ -288,7 +287,7 @@ static bool row_undo_rec_get(undo_node_t* node)
if (temp && !temp->empty() && temp->top_undo_no >= limit) {
if (!undo || undo->top_undo_no < temp->top_undo_no) {
undo = temp;
is_temp = true;
node->is_temp = true;
}
}

Expand All @@ -299,14 +298,14 @@ static bool row_undo_rec_get(undo_node_t* node)
later, we will default to a full ROLLBACK. */
trx->roll_limit = 0;
trx->in_rollback = false;
return false;
return nullptr;
}

ut_ad(!undo->empty());
ut_ad(limit <= undo->top_undo_no);

node->roll_ptr = trx_undo_build_roll_ptr(
false, trx_sys.rseg_id(undo->rseg, !is_temp),
false, trx_sys.rseg_id(undo->rseg, !node->is_temp),
undo->top_page_no, undo->top_offset);

mtr_t mtr;
Expand All @@ -316,7 +315,7 @@ static bool row_undo_rec_get(undo_node_t* node)
page_id_t(undo->rseg->space->id, undo->top_page_no),
0, RW_S_LATCH, &mtr);
if (!undo_page) {
return false;
return nullptr;
}

uint16_t offset = undo->top_offset;
Expand All @@ -338,12 +337,17 @@ static bool row_undo_rec_get(undo_node_t* node)
ut_ad(undo->empty());
}

node->undo_rec = trx_undo_rec_copy(undo_page->page.frame + offset,
node->heap);
undo_page->fix();
mtr.commit();

if (UNIV_UNLIKELY(!node->undo_rec)) {
return false;
node->undo_rec = undo_page->page.frame + offset;

const size_t end = mach_read_from_2(node->undo_rec);
if (UNIV_UNLIKELY(end <= offset
|| end >= srv_page_size - FIL_PAGE_DATA_END)) {
undo_page->unfix();
node->undo_rec = nullptr;
return nullptr;
}

switch (node->undo_rec[2] & (TRX_UNDO_CMPL_INFO_MULT - 1)) {
Expand All @@ -360,17 +364,11 @@ static bool row_undo_rec_get(undo_node_t* node)
case TRX_UNDO_INSERT_REC:
case TRX_UNDO_EMPTY:
node->roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS;
node->state = is_temp
? UNDO_INSERT_TEMPORARY : UNDO_INSERT_PERSISTENT;
break;
default:
node->state = is_temp
? UNDO_UPDATE_TEMPORARY : UNDO_UPDATE_PERSISTENT;
}

trx->undo_no = node->undo_no = trx_undo_rec_get_undo_no(
node->undo_rec);
return true;
return undo_page;
}

/***********************************************************//**
Expand All @@ -387,29 +385,17 @@ row_undo(
{
ut_ad(node->trx->in_rollback);

if (node->state == UNDO_NODE_FETCH_NEXT && !row_undo_rec_get(node)) {
buf_block_t* undo_page = row_undo_rec_get(node);

if (!undo_page) {
/* Rollback completed for this query thread */
thr->run_node = que_node_get_parent(node);
return DB_SUCCESS;
}

dberr_t err;

switch (node->state) {
case UNDO_INSERT_PERSISTENT:
case UNDO_INSERT_TEMPORARY:
err = row_undo_ins(node, thr);
break;
case UNDO_UPDATE_PERSISTENT:
case UNDO_UPDATE_TEMPORARY:
err = row_undo_mod(node, thr);
break;
default:
ut_ad("wrong state" == 0);
err = DB_CORRUPTION;
}

node->state = UNDO_NODE_FETCH_NEXT;
dberr_t err = trx_undo_roll_ptr_is_insert(node->roll_ptr)
? row_undo_ins(node, thr) : row_undo_mod(node, thr);
undo_page->unfix();
btr_pcur_close(&(node->pcur));

mem_heap_empty(node->heap);
Expand Down

0 comments on commit ea42c4b

Please sign in to comment.