Skip to content

Commit

Permalink
MDEV-32050: Do not copy undo records in purge
Browse files Browse the repository at this point in the history
Also, default to innodb_purge_batch_size=1000,
replacing the old default value of processing 300 undo log pages
in a batch. Axel Schwenke found this value to help reduce purge lag
without having a significant impact on workload throughput.

In purge, we can simply acquire a shared latch on the undo log page
(to avoid a race condition like the one that was fixed in
commit b102872) and retain a buffer-fix
after releasing the latch. The buffer-fix will prevent the undo log
page from being evicted from the buffer pool. Concurrent modification
is prevented by design. Only the purge_coordinator_task
(or its accomplice purge_truncation_task) may free the undo log pages,
after any purge_worker_task have completed execution. Hence, we do not
have to worry about any overwriting or reuse of the undo log records.

trx_undo_rec_copy(): Remove. The only remaining caller would have been
trx_undo_get_undo_rec_low(), which is where the logic was merged.

purge_sys_t::m_initialized: Replaces heap.

purge_sys_t::pages: A cache of buffer-fixed pages that have been
looked up from buf_pool.page_hash.

purge_sys_t::get_page(): Return a buffer-fixed undo page, using the
pages cache.

trx_purge_t::batch_cleanup(): Renamed from clone_end_view().
Clear the pages cache and clone the end_view at the end of a batch.

purge_sys_t::n_pages_handled(): Return pages.size(). This determines
if innodb_purge_batch_size was exceeded.

purge_sys_t::rseg_get_next_history_log(): Replaces
trx_purge_rseg_get_next_history_log().

purge_sys_t::choose_next_log(): Replaces trx_purge_choose_next_log()
and trx_purge_read_undo_rec().

purge_sys_t::get_next_rec(): Replaces trx_purge_get_next_rec()
and trx_undo_get_next_rec().

purge_sys_t::fetch_next_rec(): Replaces trx_purge_fetch_next_rec()
and some use of trx_undo_get_first_rec().

trx_purge_attach_undo_recs(): Do not allow purge_sys.n_pages_handled()
exceed the innodb_purge_batch_size or ¾ of the buffer pool, whichever
is smaller.

Reviewed by: Vladislav Lesin
Tested by: Matthias Leich and Axel Schwenke
  • Loading branch information
dr-m committed Oct 25, 2023
1 parent 8873328 commit aa719b5
Show file tree
Hide file tree
Showing 12 changed files with 314 additions and 336 deletions.
@@ -1,19 +1,19 @@
SET @global_start_value = @@global.innodb_purge_batch_size;
SELECT @global_start_value;
@global_start_value
300
1000
'#--------------------FN_DYNVARS_046_01------------------------#'
SET @@global.innodb_purge_batch_size = 1;
SET @@global.innodb_purge_batch_size = DEFAULT;
SELECT @@global.innodb_purge_batch_size;
@@global.innodb_purge_batch_size
300
1000
'#---------------------FN_DYNVARS_046_02-------------------------#'
SET innodb_purge_batch_size = 1;
ERROR HY000: Variable 'innodb_purge_batch_size' is a GLOBAL variable and should be set with SET GLOBAL
SELECT @@innodb_purge_batch_size;
@@innodb_purge_batch_size
300
1000
SELECT local.innodb_purge_batch_size;
ERROR 42S02: Unknown table 'local' in field list
SET global innodb_purge_batch_size = 1;
Expand Down Expand Up @@ -112,4 +112,4 @@ SELECT @@global.innodb_purge_batch_size;
SET @@global.innodb_purge_batch_size = @global_start_value;
SELECT @@global.innodb_purge_batch_size;
@@global.innodb_purge_batch_size
300
1000
2 changes: 1 addition & 1 deletion mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff
Expand Up @@ -307,7 +307,7 @@
NUMERIC_MAX_VALUE 65536
@@ -1345,7 +1345,7 @@
SESSION_VALUE NULL
DEFAULT_VALUE 300
DEFAULT_VALUE 1000
VARIABLE_SCOPE GLOBAL
-VARIABLE_TYPE BIGINT UNSIGNED
+VARIABLE_TYPE INT UNSIGNED
Expand Down
2 changes: 1 addition & 1 deletion mysql-test/suite/sys_vars/r/sysvars_innodb.result
Expand Up @@ -1293,7 +1293,7 @@ READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME INNODB_PURGE_BATCH_SIZE
SESSION_VALUE NULL
DEFAULT_VALUE 300
DEFAULT_VALUE 1000
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT Number of UNDO log pages to purge in one batch from the history list.
Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/handler/ha_innodb.cc
Expand Up @@ -18852,7 +18852,7 @@ static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
PLUGIN_VAR_OPCMDARG,
"Number of UNDO log pages to purge in one batch from the history list.",
NULL, NULL,
300, /* Default setting */
1000, /* Default setting */
1, /* Minimum value */
5000, 0); /* Maximum value */

Expand Down
9 changes: 0 additions & 9 deletions storage/innobase/include/row0purge.h
Expand Up @@ -80,15 +80,6 @@ row_purge_step(
que_thr_t* thr) /*!< in: query thread */
MY_ATTRIBUTE((nonnull, warn_unused_result));

/** Info required to purge a record */
struct trx_purge_rec_t
{
/** Record to purge */
const trx_undo_rec_t *undo_rec;
/** File pointer to undo record */
roll_ptr_t roll_ptr;
};

/** Purge worker context */
struct purge_node_t
{
Expand Down
78 changes: 63 additions & 15 deletions storage/innobase/include/trx0purge.h
Expand Up @@ -31,6 +31,7 @@ Created 3/26/1996 Heikki Tuuri
#include "srw_lock.h"

#include <queue>
#include <unordered_map>

/** Prepend the history list with an undo log.
Remove the undo log segment from the rseg slot if it is too big for reuse.
Expand Down Expand Up @@ -127,13 +128,16 @@ struct TrxUndoRsegsIterator {
/** The control structure used in the purge operation */
class purge_sys_t
{
friend TrxUndoRsegsIterator;
public:
/** latch protecting view, m_enabled */
alignas(CPU_LEVEL1_DCACHE_LINESIZE) mutable srw_spin_lock latch;
private:
/** Read view at the start of a purge batch. Any encountered index records
that are older than view will be removed. */
ReadViewBase view;
/** whether the subsystem has been initialized */
bool m_initialized{false};
/** whether purge is enabled; protected by latch and std::atomic */
std::atomic<bool> m_enabled{false};
public:
Expand All @@ -152,7 +156,34 @@ class purge_sys_t
/** Read view at the end of a purge batch (copied from view). Any undo pages
containing records older than end_view may be freed. */
ReadViewBase end_view;

struct hasher
{
size_t operator()(const page_id_t &id) const { return size_t(id.raw()); }
};

using unordered_map =
std::unordered_map<const page_id_t, buf_block_t*, hasher,
#if defined __GNUC__ && __GNUC__ == 4 && __GNUC_MINOR__ >= 8
std::equal_to<page_id_t>
/* GCC 4.8.5 would fail to find a matching allocator */
#else
std::equal_to<page_id_t>,
ut_allocator<std::pair<const page_id_t, buf_block_t*>>
#endif
>;
/** map of buffer-fixed undo log pages processed during a purge batch */
unordered_map pages;
public:
/** @return the number of processed undo pages */
size_t n_pages_handled() const { return pages.size(); }

/** Look up an undo log page.
@param id undo page identifier
@return undo page
@retval nullptr in case the page is corrupted */
buf_block_t *get_page(page_id_t id);

que_t* query; /*!< The query graph which will do the
parallelized purge operation */

Expand Down Expand Up @@ -188,6 +219,7 @@ class purge_sys_t
to purge */
trx_rseg_t* rseg; /*!< Rollback segment for the next undo
record to purge */
private:
uint32_t page_no; /*!< Page number for the next undo
record to purge, page number of the
log header, if dummy record */
Expand All @@ -202,7 +234,7 @@ class purge_sys_t
TrxUndoRsegsIterator
rseg_iter; /*!< Iterator to get the next rseg
to process */

public:
purge_pq_t purge_queue; /*!< Binary min-heap, ordered on
TrxUndoRsegs::trx_no. It is protected
by the pq_mutex */
Expand All @@ -217,17 +249,6 @@ class purge_sys_t
fil_space_t* last;
} truncate;

/** Heap for reading the undo log records */
mem_heap_t* heap;
/**
Constructor.
Some members may require late initialisation, thus we just mark object as
uninitialised. Real initialisation happens in create().
*/

purge_sys_t(): m_enabled(false), heap(nullptr) {}

/** Create the instance */
void create();

Expand Down Expand Up @@ -281,6 +302,32 @@ class purge_sys_t
/** @return whether stop_SYS() is in effect */
bool must_wait_FTS() const { return m_FTS_paused; }

private:
/**
Get the next record to purge and update the info in the purge system.
@param roll_ptr undo log pointer to the record
@return buffer-fixed reference to undo log record
@retval {nullptr,1} if the whole undo log can skipped in purge
@retval {nullptr,0} if nothing is left, or on corruption */
inline trx_purge_rec_t get_next_rec(roll_ptr_t roll_ptr);

/** Choose the next undo log to purge.
@return whether anything is to be purged */
bool choose_next_log();

/** Update the last not yet purged history log info in rseg when
we have purged a whole undo log. Advances also purge_trx_no
past the purged log. */
void rseg_get_next_history_log();

public:
/**
Fetch the next undo log record from the history list to purge.
@return buffer-fixed reference to undo log record
@retval {nullptr,1} if the whole undo log can skipped in purge
@retval {nullptr,0} if nothing is left, or on corruption */
inline trx_purge_rec_t fetch_next_rec();

/** Determine if the history of a transaction is purgeable.
@param trx_id transaction identifier
@return whether the history is purgeable */
Expand Down Expand Up @@ -327,9 +374,10 @@ class purge_sys_t
/** Wake up the purge threads if there is work to do. */
void wake_if_not_active();

/** Update end_view at the end of a purge batch.
@param head the new head of the purge queue */
inline void clone_end_view(const iterator &head);
/** Release undo pages and update end_view at the end of a purge batch.
@retval false when nothing is to be purged
@retval true when purge_sys.rseg->latch was locked */
inline void batch_cleanup(const iterator &head);

struct view_guard
{
Expand Down
23 changes: 0 additions & 23 deletions storage/innobase/include/trx0rec.h
Expand Up @@ -28,32 +28,9 @@ Created 3/26/1996 Heikki Tuuri

#include "trx0types.h"
#include "row0types.h"
#include "mtr0mtr.h"
#include "rem0types.h"
#include "page0types.h"
#include "row0log.h"
#include "que0types.h"

/***********************************************************************//**
Copies the undo record to the heap.
@param undo_rec record in an undo log page
@param heap memory heap
@return copy of undo_rec
@retval nullptr if the undo log record is corrupted */
inline trx_undo_rec_t* trx_undo_rec_copy(const trx_undo_rec_t *undo_rec,
mem_heap_t *heap)
{
const size_t offset= ut_align_offset(undo_rec, srv_page_size);
const size_t end= mach_read_from_2(undo_rec);
if (end <= offset || end >= srv_page_size - FIL_PAGE_DATA_END)
return nullptr;
const size_t len= end - offset;
trx_undo_rec_t *rec= static_cast<trx_undo_rec_t*>
(mem_heap_dup(heap, undo_rec, len));
mach_write_to_2(rec, len);
return rec;
}

/**********************************************************************//**
Reads the undo log record number.
@return undo no */
Expand Down
9 changes: 9 additions & 0 deletions storage/innobase/include/trx0types.h
Expand Up @@ -107,6 +107,15 @@ typedef byte trx_undo_rec_t;

/* @} */

/** Info required to purge a record */
struct trx_purge_rec_t
{
/** Undo log record, or nullptr (roll_ptr!=0 if the log can be skipped) */
const trx_undo_rec_t *undo_rec;
/** File pointer to undo_rec */
roll_ptr_t roll_ptr;
};

typedef std::vector<trx_id_t, ut_allocator<trx_id_t> > trx_ids_t;

/** Number of std::unordered_map hash buckets expected to be needed
Expand Down
31 changes: 8 additions & 23 deletions storage/innobase/include/trx0undo.h
Expand Up @@ -116,31 +116,16 @@ trx_undo_page_get_next_rec(const buf_block_t *undo_page, uint16_t rec,
trx_undo_rec_t*
trx_undo_get_prev_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no,
uint16_t offset, bool shared, mtr_t *mtr);
/** Get the next record in an undo log.
@param[in,out] block undo log page
@param[in] rec undo record offset in the page
@param[in] page_no undo log header page number
@param[in] offset undo log header offset on page
@param[in,out] mtr mini-transaction
@return undo log record, the page latched, NULL if none */
trx_undo_rec_t*
trx_undo_get_next_rec(const buf_block_t *&block, uint16_t rec,
uint32_t page_no, uint16_t offset, mtr_t *mtr);

/** Get the first record in an undo log.
@param[in] space undo log header space
@param[in] page_no undo log header page number
@param[in] offset undo log header offset on page
@param[in] mode latching mode: RW_S_LATCH or RW_X_LATCH
@param[out] block undo log page
@param[in,out] mtr mini-transaction
@param[out] err error code
@return undo log record, the page latched
@retval nullptr if none */
/** Get the first undo log record on a page.
@param[in] block undo log page
@param[in] page_no undo log header page number
@param[in] offset undo log header page offset
@return pointer to first record
@retval nullptr if none exists */
trx_undo_rec_t*
trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no,
uint16_t offset, ulint mode, const buf_block_t*& block,
mtr_t *mtr, dberr_t *err);
trx_undo_page_get_first_rec(const buf_block_t *block, uint32_t page_no,
uint16_t offset);

/** Initialize an undo log page.
NOTE: This corresponds to a redo log record and must not be changed!
Expand Down

0 comments on commit aa719b5

Please sign in to comment.