Skip to content

Commit

Permalink
MDEV-15090 Reduce the overhead of writing undo log records
Browse files Browse the repository at this point in the history
Inside InnoDB, each mini-transaction that generates any redo log records
will acquire log_sys->mutex during mtr_t::commit() in order to copy the
records into the global log_sys->buf for writing into the redo log file.

For single-row transactions, this incurs quite a bit of overhead.
We would use two mini-transactions for writing a record into a
freshly updated undo log page. (Only if the undo record will
not fit in that page, then we will have to commit and restart
the mini-transaction.)

trx_undo_assign(): Assign undo log for a persistent transaction,
or return the already assigned one.

trx_undo_assign_low(): Assign undo log for an operation on a
persistent or temporary table.

trx_undo_create(), trx_undo_reuse_cached(): Remove redundant parameters.
Merge the logic from trx_undo_mark_as_dict_operation().
  • Loading branch information
dr-m committed Jan 29, 2018
1 parent 4981f95 commit 5d3c3b4
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 88 deletions.
15 changes: 14 additions & 1 deletion storage/innobase/include/trx0undo.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,18 +252,31 @@ ulint
trx_undo_lists_init(
/*================*/
trx_rseg_t* rseg); /*!< in: rollback segment memory object */
/** Assign an undo log for a persistent transaction.
A new undo log is created or a cached undo log reused.
@param[in,out] trx transaction
@param[in,out] mtr mini-transaction
@retval DB_SUCCESS on success
@retval DB_TOO_MANY_CONCURRENT_TRXS
@retval DB_OUT_OF_FILE_SPACE
@retval DB_READ_ONLY
@retval DB_OUT_OF_MEMORY */
dberr_t
trx_undo_assign(trx_t* trx, mtr_t* mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Assign an undo log for a transaction.
A new undo log is created or a cached undo log reused.
@param[in,out] trx transaction
@param[in] rseg rollback segment
@param[out] undo the undo log
@param[in,out] mtr mini-transaction
@retval DB_SUCCESS on success
@retval DB_TOO_MANY_CONCURRENT_TRXS
@retval DB_OUT_OF_FILE_SPACE
@retval DB_READ_ONLY
@retval DB_OUT_OF_MEMORY */
dberr_t
trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo)
trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo, mtr_t*mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/******************************************************************//**
Sets the state of the undo log segment at a transaction finish.
Expand Down
8 changes: 6 additions & 2 deletions storage/innobase/row/row0import.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3400,8 +3400,12 @@ row_import_for_mysql(
mutex_enter(&trx->undo_mutex);

/* TODO: Do not write any undo log for the IMPORT cleanup. */
err = trx_undo_assign_undo(trx, trx->rsegs.m_redo.rseg,
&trx->rsegs.m_redo.undo);
{
mtr_t mtr;
mtr.start();
err = trx_undo_assign(trx, &mtr);
mtr.commit();
}

mutex_exit(&trx->undo_mutex);

Expand Down
8 changes: 5 additions & 3 deletions storage/innobase/row/row0trunc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1819,10 +1819,12 @@ row_truncate_table_for_mysql(

/* Step-6: Truncate operation can be rolled back in case of error
till some point. Associate rollback segment to record undo log. */
if (!dict_table_is_temporary(table)) {
if (!table->is_temporary()) {
mutex_enter(&trx->undo_mutex);
err = trx_undo_assign_undo(trx, trx->rsegs.m_redo.rseg,
&trx->rsegs.m_redo.undo);
mtr_t mtr;
mtr.start();
err = trx_undo_assign(trx, &mtr);
mtr.commit();
mutex_exit(&trx->undo_mutex);

DBUG_EXECUTE_IF("ib_err_trunc_assigning_undo_log",
Expand Down
40 changes: 15 additions & 25 deletions storage/innobase/trx/trx0rec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1901,17 +1901,12 @@ trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
ut_ad(trx->id);
ut_ad(!table->is_temporary());

trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
trx_undo_t** pundo = &trx->rsegs.m_redo.undo;
mtr_t mtr;
mtr.start();
mutex_enter(&trx->undo_mutex);
dberr_t err = *pundo
? DB_SUCCESS
: trx_undo_assign_undo(trx, rseg, pundo);
ut_ad((err == DB_SUCCESS) == (*pundo != NULL));
if (trx_undo_t* undo = *pundo) {
mtr_t mtr;
mtr.start(trx);

dberr_t err = trx_undo_assign(trx, &mtr);
ut_ad((err == DB_SUCCESS) == (trx->rsegs.m_redo.undo != NULL));
if (trx_undo_t* undo = trx->rsegs.m_redo.undo) {
buf_block_t* block = buf_page_get_gen(
page_id_t(undo->space, undo->last_page_no),
univ_page_size, RW_X_LATCH,
Expand All @@ -1934,12 +1929,13 @@ trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
undo->top_undo_no = trx->undo_no++;
undo->guess_block = block;

trx->undo_rseg_space = rseg->space;
trx->undo_rseg_space
= trx->rsegs.m_redo.rseg->space;
err = DB_SUCCESS;
break;
} else {
mtr.commit();
mtr.start(trx);
mtr.start();
block = trx_undo_add_page(trx, undo, &mtr);
if (!block) {
err = DB_OUT_OF_FILE_SPACE;
Expand Down Expand Up @@ -2006,7 +2002,7 @@ trx_undo_report_row_operation(
mtr.start();
trx_undo_t** pundo;
trx_rseg_t* rseg;
const bool is_temp = dict_table_is_temporary(index->table);
const bool is_temp = index->table->is_temporary();

if (is_temp) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
Expand All @@ -2021,9 +2017,9 @@ trx_undo_report_row_operation(
}

mutex_enter(&trx->undo_mutex);
dberr_t err = *pundo ? DB_SUCCESS : trx_undo_assign_undo(
trx, rseg, pundo);
trx_undo_t* undo = *pundo;
dberr_t err = *pundo
? DB_SUCCESS : trx_undo_assign_low(trx, rseg, pundo, &mtr);
trx_undo_t* undo = *pundo;

ut_ad((err == DB_SUCCESS) == (undo != NULL));
if (undo == NULL) {
Expand Down Expand Up @@ -2051,12 +2047,6 @@ trx_undo_report_row_operation(
cmpl_info, clust_entry, &mtr);

if (UNIV_UNLIKELY(offset == 0)) {
/* The record did not fit on the page. We erase the
end segment of the undo log page and write a log
record of it: this is to ensure that in the debug
version the replicate page constructed using the log
records stays identical to the original page */

if (!trx_undo_erase_page_end(undo_page)) {
/* The record did not fit on an empty
undo page. Discard the freshly allocated
Expand All @@ -2071,8 +2061,8 @@ trx_undo_report_row_operation(
first, because it may be holding lower-level
latches, such as SYNC_FSP and SYNC_FSP_PAGE. */

mtr_commit(&mtr);
mtr.start(trx);
mtr.commit();
mtr.start();
if (is_temp) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
}
Expand Down Expand Up @@ -2132,7 +2122,7 @@ trx_undo_report_row_operation(
/* We have to extend the undo log by one page */

ut_ad(++loop_count < 2);
mtr.start(trx);
mtr.start();

if (is_temp) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
Expand Down
138 changes: 81 additions & 57 deletions storage/innobase/trx/trx0undo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1299,9 +1299,6 @@ trx_undo_create(
/*============*/
trx_t* trx, /*!< in: transaction */
trx_rseg_t* rseg, /*!< in: rollback segment memory copy */
trx_id_t trx_id, /*!< in: id of the trx for which the undo log
is created */
const XID* xid, /*!< in: X/Open transaction identification*/
trx_undo_t** undo, /*!< out: the new undo log object, undefined
* if did not succeed */
mtr_t* mtr) /*!< in: mtr */
Expand Down Expand Up @@ -1332,17 +1329,36 @@ trx_undo_create(

page_no = page_get_page_no(undo_page);

offset = trx_undo_header_create(undo_page, trx_id, mtr);
offset = trx_undo_header_create(undo_page, trx->id, mtr);

trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr);

*undo = trx_undo_mem_create(rseg, id, trx_id, xid, page_no, offset);
*undo = trx_undo_mem_create(rseg, id, trx->id, trx->xid,
page_no, offset);
if (*undo == NULL) {

err = DB_OUT_OF_MEMORY;
return DB_OUT_OF_MEMORY;
} else if (rseg != trx->rsegs.m_redo.rseg) {
return DB_SUCCESS;
}

return(err);
switch (trx_get_dict_operation(trx)) {
case TRX_DICT_OP_NONE:
break;
case TRX_DICT_OP_INDEX:
/* Do not discard the table on recovery. */
trx->table_id = 0;
/* fall through */
case TRX_DICT_OP_TABLE:
(*undo)->table_id = trx->table_id;
(*undo)->dict_operation = TRUE;
mlog_write_ulint(undo_page + offset + TRX_UNDO_DICT_TRANS,
TRUE, MLOG_1BYTE, mtr);
mlog_write_ull(undo_page + offset + TRX_UNDO_TABLE_ID,
trx->table_id, mtr);
}

return DB_SUCCESS;
}

/*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
Expand All @@ -1356,9 +1372,6 @@ trx_undo_reuse_cached(
/*==================*/
trx_t* trx, /*!< in: transaction */
trx_rseg_t* rseg, /*!< in: rollback segment memory object */
trx_id_t trx_id, /*!< in: id of the trx for which the undo log
is used */
const XID* xid, /*!< in: X/Open XA transaction identification */
mtr_t* mtr) /*!< in: mtr */
{
trx_undo_t* undo;
Expand All @@ -1380,67 +1393,89 @@ trx_undo_reuse_cached(
undo_page = trx_undo_page_get(
page_id_t(undo->space, undo->hdr_page_no), mtr);

offset = trx_undo_header_create(undo_page, trx_id, mtr);
offset = trx_undo_header_create(undo_page, trx->id, mtr);

trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr);
trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset);

return(undo);
}

/**********************************************************************//**
Marks an undo log header as a header of a data dictionary operation
transaction. */
static
void
trx_undo_mark_as_dict_operation(
/*============================*/
trx_t* trx, /*!< in: dict op transaction */
trx_undo_t* undo, /*!< in: assigned undo log */
mtr_t* mtr) /*!< in: mtr */
{
page_t* hdr_page;
trx_undo_mem_init_for_reuse(undo, trx->id, trx->xid, offset);

hdr_page = trx_undo_page_get(
page_id_t(undo->space, undo->hdr_page_no), mtr);
if (rseg != trx->rsegs.m_redo.rseg) {
return undo;
}

switch (trx_get_dict_operation(trx)) {
case TRX_DICT_OP_NONE:
ut_error;
return undo;
case TRX_DICT_OP_INDEX:
/* Do not discard the table on recovery. */
undo->table_id = 0;
break;
trx->table_id = 0;
/* fall through */
case TRX_DICT_OP_TABLE:
undo->table_id = trx->table_id;
break;
undo->dict_operation = TRUE;
mlog_write_ulint(undo_page + offset + TRX_UNDO_DICT_TRANS,
TRUE, MLOG_1BYTE, mtr);
mlog_write_ull(undo_page + offset + TRX_UNDO_TABLE_ID,
trx->table_id, mtr);
}

mlog_write_ulint(hdr_page + undo->hdr_offset
+ TRX_UNDO_DICT_TRANS,
TRUE, MLOG_1BYTE, mtr);
return(undo);
}

mlog_write_ull(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID,
undo->table_id, mtr);
/** Assign an undo log for a persistent transaction.
A new undo log is created or a cached undo log reused.
@param[in,out] trx transaction
@param[in,out] mtr mini-transaction
@retval DB_SUCCESS on success
@retval DB_TOO_MANY_CONCURRENT_TRXS
@retval DB_OUT_OF_FILE_SPACE
@retval DB_READ_ONLY
@retval DB_OUT_OF_MEMORY */
dberr_t
trx_undo_assign(trx_t* trx, mtr_t* mtr)
{
dberr_t err = DB_SUCCESS;

ut_ad(mutex_own(&trx->undo_mutex));
ut_ad(mtr->get_log_mode() == MTR_LOG_ALL);

undo->dict_operation = TRUE;
if (trx->rsegs.m_redo.undo) {
return DB_SUCCESS;
}

trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;

mutex_enter(&rseg->mutex);
if (!(trx->rsegs.m_redo.undo= trx_undo_reuse_cached(trx, rseg, mtr))) {
err = trx_undo_create(trx, rseg, &trx->rsegs.m_redo.undo, mtr);
if (err != DB_SUCCESS) {
goto func_exit;
}
}

UT_LIST_ADD_FIRST(rseg->undo_list, trx->rsegs.m_redo.undo);

func_exit:
mutex_exit(&rseg->mutex);

return err;
}

/** Assign an undo log for a transaction.
A new undo log is created or a cached undo log reused.
@param[in,out] trx transaction
@param[in] rseg rollback segment
@param[out] undo the undo log
@param[in,out] mtr mini-transaction
@retval DB_SUCCESS on success
@retval DB_TOO_MANY_CONCURRENT_TRXS
@retval DB_OUT_OF_FILE_SPACE
@retval DB_READ_ONLY
@retval DB_OUT_OF_MEMORY */
dberr_t
trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo)
trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo, mtr_t*mtr)
{
const bool is_temp = rseg == trx->rsegs.m_noredo.rseg;
mtr_t mtr;
dberr_t err = DB_SUCCESS;

ut_ad(mutex_own(&trx->undo_mutex));
Expand All @@ -1449,12 +1484,9 @@ trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo)
ut_ad(undo == (is_temp
? &trx->rsegs.m_noredo.undo
: &trx->rsegs.m_redo.undo));

mtr.start(trx);

if (is_temp) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
}
ut_ad(!*undo);
ut_ad(mtr->get_log_mode()
== (is_temp ? MTR_LOG_NO_REDO : MTR_LOG_ALL));

mutex_enter(&rseg->mutex);

Expand All @@ -1464,25 +1496,17 @@ trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo)
goto func_exit;
);

*undo = trx_undo_reuse_cached(trx, rseg, trx->id, trx->xid, &mtr);
if (*undo == NULL) {
err = trx_undo_create(trx, rseg, trx->id, trx->xid,
undo, &mtr);
if (!(*undo= trx_undo_reuse_cached(trx, rseg, mtr))) {
err = trx_undo_create(trx, rseg, undo, mtr);
if (err != DB_SUCCESS) {
goto func_exit;
}
}

UT_LIST_ADD_FIRST(rseg->undo_list, *undo);

if (!is_temp && trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
trx_undo_mark_as_dict_operation(trx, *undo, &mtr);
}

func_exit:
mutex_exit(&rseg->mutex);
mtr.commit();

return(err);
}

Expand Down

0 comments on commit 5d3c3b4

Please sign in to comment.