Skip to content

Commit ea42c4b

Browse files
committed
MDEV-32050 preparation: Simplify ROLLBACK
undo_node_t::state: Replaced with bool is_temp. row_undo_rec_get(): Do not copy the undo log record. The motivation of the copying was to not hold latches on the undo pages and therefore to avoid deadlocks due to lock order inversion a.k.a. latching order violation: It is not allowed to wait for an index page latch while holding an undo page latch, because MVCC reads would first acquire an index page latch and then an undo page latch. But, in rollback, we do not actually need any latch on our own undo pages. The transaction that is being rolled back is the exclusive owner of its undo log records. They cannot be overwritten by other threads until the rollback is complete. Therefore, a buffer fix will protect the undo log record just fine, by preventing page eviction. We still must initially acquire a shared latch on each undo page, to avoid a race condition like the one that was fixed in commit b102872. row_undo_ins_parse_undo_rec(): The first two bytes of the undo log record now are the pointer to the next record within the page, not a length. Reviewed by: Vladislav Lesin
1 parent b78b77e commit ea42c4b

File tree

4 files changed

+29
-61
lines changed

4 files changed

+29
-61
lines changed

storage/innobase/include/row0undo.h

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -78,24 +78,10 @@ just in the case where the transaction modified the same record several times
7878
and another thread is currently doing the undo for successive versions of
7979
that index record. */
8080

81-
/** Execution state of an undo node */
82-
enum undo_exec {
83-
UNDO_NODE_FETCH_NEXT = 1, /*!< we should fetch the next
84-
undo log record */
85-
/** rollback an insert into persistent table */
86-
UNDO_INSERT_PERSISTENT,
87-
/** rollback an update (or delete) in a persistent table */
88-
UNDO_UPDATE_PERSISTENT,
89-
/** rollback an insert into temporary table */
90-
UNDO_INSERT_TEMPORARY,
91-
/** rollback an update (or delete) in a temporary table */
92-
UNDO_UPDATE_TEMPORARY,
93-
};
94-
9581
/** Undo node structure */
9682
struct undo_node_t{
9783
que_common_t common; /*!< node type: QUE_NODE_UNDO */
98-
undo_exec state; /*!< rollback execution state */
84+
bool is_temp;/*!< whether this is a temporary table */
9985
trx_t* trx; /*!< trx for which undo is done */
10086
roll_ptr_t roll_ptr;/*!< roll pointer to undo log record */
10187
trx_undo_rec_t* undo_rec;/*!< undo log record */

storage/innobase/row/row0uins.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -389,16 +389,14 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked)
389389
ulint dummy;
390390
bool dummy_extern;
391391

392-
ut_ad(node->state == UNDO_INSERT_PERSISTENT
393-
|| node->state == UNDO_INSERT_TEMPORARY);
394392
ut_ad(node->trx->in_rollback);
395393
ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr));
396394

397395
ptr = trx_undo_rec_get_pars(node->undo_rec, &node->rec_type, &dummy,
398396
&dummy_extern, &undo_no, &table_id);
399397

400398
node->update = NULL;
401-
if (node->state == UNDO_INSERT_PERSISTENT) {
399+
if (!node->is_temp) {
402400
node->table = dict_table_open_on_id(table_id, dict_locked,
403401
DICT_TABLE_OP_NORMAL);
404402
} else if (!dict_locked) {
@@ -428,7 +426,7 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked)
428426
|| dict_table_is_file_per_table(table)
429427
== !is_system_tablespace(table->space_id));
430428
size_t len = mach_read_from_2(node->undo_rec)
431-
+ size_t(node->undo_rec - ptr) - 2;
429+
- page_offset(ptr) - 2;
432430
const span<const char> name(reinterpret_cast<const char*>(ptr),
433431
len);
434432
if (strlen(table->name.m_name) != len

storage/innobase/row/row0umod.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,8 +1085,6 @@ static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked)
10851085
ulint cmpl_info;
10861086
bool dummy_extern;
10871087

1088-
ut_ad(node->state == UNDO_UPDATE_PERSISTENT
1089-
|| node->state == UNDO_UPDATE_TEMPORARY);
10901088
ut_ad(node->trx->in_rollback);
10911089
ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
10921090

@@ -1095,7 +1093,7 @@ static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked)
10951093
&dummy_extern, &undo_no, &table_id);
10961094
node->rec_type = type;
10971095

1098-
if (node->state == UNDO_UPDATE_PERSISTENT) {
1096+
if (!node->is_temp) {
10991097
node->table = dict_table_open_on_id(table_id, dict_locked,
11001098
DICT_TABLE_OP_NORMAL);
11011099
} else if (!dict_locked) {

storage/innobase/row/row0undo.cc

Lines changed: 25 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ row_undo_node_create(
140140
undo->common.type = QUE_NODE_UNDO;
141141
undo->common.parent = parent;
142142

143-
undo->state = UNDO_NODE_FETCH_NEXT;
144143
undo->trx = trx;
145144

146145
btr_pcur_init(&(undo->pcur));
@@ -219,8 +218,7 @@ row_undo_search_clust_to_pcur(
219218
log, first mark them DATA_MISSING. So we will know if the
220219
value gets updated */
221220
if (node->table->n_v_cols
222-
&& (node->state == UNDO_UPDATE_PERSISTENT
223-
|| node->state == UNDO_UPDATE_TEMPORARY)
221+
&& !trx_undo_roll_ptr_is_insert(node->roll_ptr)
224222
&& !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
225223
for (ulint i = 0;
226224
i < dict_table_get_n_v_cols(node->table); i++) {
@@ -258,8 +256,9 @@ row_undo_search_clust_to_pcur(
258256

259257
/** Get the latest undo log record for rollback.
260258
@param[in,out] node rollback context
261-
@return whether an undo log record was fetched */
262-
static bool row_undo_rec_get(undo_node_t* node)
259+
@return undo block for the undo log record
260+
@retval nullptr if no undo log record was fetched */
261+
static buf_block_t* row_undo_rec_get(undo_node_t* node)
263262
{
264263
trx_t* trx = node->trx;
265264

@@ -272,7 +271,7 @@ static bool row_undo_rec_get(undo_node_t* node)
272271
trx_undo_t* update = trx->rsegs.m_redo.undo;
273272
trx_undo_t* temp = trx->rsegs.m_noredo.undo;
274273
const undo_no_t limit = trx->roll_limit;
275-
bool is_temp = false;
274+
node->is_temp = false;
276275

277276
ut_ad(!update || !temp || update->empty() || temp->empty()
278277
|| update->top_undo_no != temp->top_undo_no);
@@ -288,7 +287,7 @@ static bool row_undo_rec_get(undo_node_t* node)
288287
if (temp && !temp->empty() && temp->top_undo_no >= limit) {
289288
if (!undo || undo->top_undo_no < temp->top_undo_no) {
290289
undo = temp;
291-
is_temp = true;
290+
node->is_temp = true;
292291
}
293292
}
294293

@@ -299,14 +298,14 @@ static bool row_undo_rec_get(undo_node_t* node)
299298
later, we will default to a full ROLLBACK. */
300299
trx->roll_limit = 0;
301300
trx->in_rollback = false;
302-
return false;
301+
return nullptr;
303302
}
304303

305304
ut_ad(!undo->empty());
306305
ut_ad(limit <= undo->top_undo_no);
307306

308307
node->roll_ptr = trx_undo_build_roll_ptr(
309-
false, trx_sys.rseg_id(undo->rseg, !is_temp),
308+
false, trx_sys.rseg_id(undo->rseg, !node->is_temp),
310309
undo->top_page_no, undo->top_offset);
311310

312311
mtr_t mtr;
@@ -316,7 +315,7 @@ static bool row_undo_rec_get(undo_node_t* node)
316315
page_id_t(undo->rseg->space->id, undo->top_page_no),
317316
0, RW_S_LATCH, &mtr);
318317
if (!undo_page) {
319-
return false;
318+
return nullptr;
320319
}
321320

322321
uint16_t offset = undo->top_offset;
@@ -338,12 +337,17 @@ static bool row_undo_rec_get(undo_node_t* node)
338337
ut_ad(undo->empty());
339338
}
340339

341-
node->undo_rec = trx_undo_rec_copy(undo_page->page.frame + offset,
342-
node->heap);
340+
undo_page->fix();
343341
mtr.commit();
344342

345-
if (UNIV_UNLIKELY(!node->undo_rec)) {
346-
return false;
343+
node->undo_rec = undo_page->page.frame + offset;
344+
345+
const size_t end = mach_read_from_2(node->undo_rec);
346+
if (UNIV_UNLIKELY(end <= offset
347+
|| end >= srv_page_size - FIL_PAGE_DATA_END)) {
348+
undo_page->unfix();
349+
node->undo_rec = nullptr;
350+
return nullptr;
347351
}
348352

349353
switch (node->undo_rec[2] & (TRX_UNDO_CMPL_INFO_MULT - 1)) {
@@ -360,17 +364,11 @@ static bool row_undo_rec_get(undo_node_t* node)
360364
case TRX_UNDO_INSERT_REC:
361365
case TRX_UNDO_EMPTY:
362366
node->roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS;
363-
node->state = is_temp
364-
? UNDO_INSERT_TEMPORARY : UNDO_INSERT_PERSISTENT;
365-
break;
366-
default:
367-
node->state = is_temp
368-
? UNDO_UPDATE_TEMPORARY : UNDO_UPDATE_PERSISTENT;
369367
}
370368

371369
trx->undo_no = node->undo_no = trx_undo_rec_get_undo_no(
372370
node->undo_rec);
373-
return true;
371+
return undo_page;
374372
}
375373

376374
/***********************************************************//**
@@ -387,29 +385,17 @@ row_undo(
387385
{
388386
ut_ad(node->trx->in_rollback);
389387

390-
if (node->state == UNDO_NODE_FETCH_NEXT && !row_undo_rec_get(node)) {
388+
buf_block_t* undo_page = row_undo_rec_get(node);
389+
390+
if (!undo_page) {
391391
/* Rollback completed for this query thread */
392392
thr->run_node = que_node_get_parent(node);
393393
return DB_SUCCESS;
394394
}
395395

396-
dberr_t err;
397-
398-
switch (node->state) {
399-
case UNDO_INSERT_PERSISTENT:
400-
case UNDO_INSERT_TEMPORARY:
401-
err = row_undo_ins(node, thr);
402-
break;
403-
case UNDO_UPDATE_PERSISTENT:
404-
case UNDO_UPDATE_TEMPORARY:
405-
err = row_undo_mod(node, thr);
406-
break;
407-
default:
408-
ut_ad("wrong state" == 0);
409-
err = DB_CORRUPTION;
410-
}
411-
412-
node->state = UNDO_NODE_FETCH_NEXT;
396+
dberr_t err = trx_undo_roll_ptr_is_insert(node->roll_ptr)
397+
? row_undo_ins(node, thr) : row_undo_mod(node, thr);
398+
undo_page->unfix();
413399
btr_pcur_close(&(node->pcur));
414400

415401
mem_heap_empty(node->heap);

0 commit comments

Comments
 (0)