Skip to content

Commit

Permalink
MDEV-25062: Reduce trx_rseg_t::mutex contention
Browse files Browse the repository at this point in the history
redo_rseg_mutex, noredo_rseg_mutex: Remove the PERFORMANCE_SCHEMA keys.
The rollback segment mutex will be uninstrumented.

trx_sys_t: Remove pointer indirection for rseg_array, temp_rseg.
Align each element to the cache line.

trx_sys_t::rseg_id(): Replaces trx_rseg_t::id.

trx_rseg_t::ref: Replaces needs_purge, trx_ref_count, skip_allocation
in a single std::atomic<uint32_t>.

trx_rseg_t::latch: Replaces trx_rseg_t::mutex.

trx_rseg_t::history_size: Replaces trx_sys_t::rseg_history_len

trx_sys_t::history_size_approx(): Replaces trx_sys.rseg_history_len
in those places where the exact count does not matter. We must not
acquire any trx_rseg_t::latch while holding index page latches, because
normally the trx_rseg_t::latch is acquired before any page latches.

trx_sys_t::history_exists(): Replaces trx_sys.rseg_history_len!=0
with an approximation.

We remove some unnecessary trx_rseg_t::latch acquisition around
trx_undo_set_state_at_prepare() and trx_undo_set_state_at_finish().
Those operations will only access fields that remain constant
after trx_rseg_t::init().
  • Loading branch information
dr-m committed Jun 23, 2021
1 parent b3e8788 commit 6e12ebd
Show file tree
Hide file tree
Showing 18 changed files with 482 additions and 499 deletions.
15 changes: 9 additions & 6 deletions storage/innobase/btr/btr0cur.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1462,8 +1462,9 @@ btr_cur_search_to_nth_level_func(
Free blocks and read IO bandwidth should be prior
for them, when the history list is glowing huge. */
if (lock_intention == BTR_INTENTION_DELETE
&& trx_sys.rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
&& buf_pool.n_pend_reads) {
&& buf_pool.n_pend_reads
&& trx_sys.history_size_approx()
> BTR_CUR_FINE_HISTORY_LENGTH) {
x_latch_index:
mtr_x_lock_index(index, mtr);
} else if (index->is_spatial()
Expand Down Expand Up @@ -2575,8 +2576,9 @@ btr_cur_open_at_index_side(
Free blocks and read IO bandwidth should be prior
for them, when the history list is glowing huge. */
if (lock_intention == BTR_INTENTION_DELETE
&& trx_sys.rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
&& buf_pool.n_pend_reads) {
&& buf_pool.n_pend_reads
&& trx_sys.history_size_approx()
> BTR_CUR_FINE_HISTORY_LENGTH) {
mtr_x_lock_index(index, mtr);
} else {
mtr_sx_lock_index(index, mtr);
Expand Down Expand Up @@ -2898,8 +2900,9 @@ btr_cur_open_at_rnd_pos(
Free blocks and read IO bandwidth should be prior
for them, when the history list is glowing huge. */
if (lock_intention == BTR_INTENTION_DELETE
&& trx_sys.rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
&& buf_pool.n_pend_reads) {
&& buf_pool.n_pend_reads
&& trx_sys.history_size_approx()
> BTR_CUR_FINE_HISTORY_LENGTH) {
mtr_x_lock_index(index, mtr);
} else {
mtr_sx_lock_index(index, mtr);
Expand Down
10 changes: 3 additions & 7 deletions storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,15 +216,15 @@ static my_bool innodb_read_only_compressed;
/** A dummy variable */
static uint innodb_max_purge_lag_wait;

/** Wait for trx_sys_t::rseg_history_len to be below a limit. */
/** Wait for trx_sys.history_size() to be below a limit. */
static void innodb_max_purge_lag_wait_update(THD *thd, st_mysql_sys_var *,
void *, const void *limit)
{
const uint l= *static_cast<const uint*>(limit);
if (trx_sys.rseg_history_len <= l)
if (!trx_sys.history_exceeds(l))
return;
mysql_mutex_unlock(&LOCK_global_system_variables);
while (trx_sys.rseg_history_len > l)
while (trx_sys.history_exceeds(l))
{
if (thd_kill_level(thd))
break;
Expand Down Expand Up @@ -520,8 +520,6 @@ mysql_pfs_key_t log_flush_order_mutex_key;
mysql_pfs_key_t recalc_pool_mutex_key;
mysql_pfs_key_t purge_sys_pq_mutex_key;
mysql_pfs_key_t recv_sys_mutex_key;
mysql_pfs_key_t redo_rseg_mutex_key;
mysql_pfs_key_t noredo_rseg_mutex_key;
mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
mysql_pfs_key_t rtr_active_mutex_key;
mysql_pfs_key_t rtr_match_mutex_key;
Expand Down Expand Up @@ -564,8 +562,6 @@ static PSI_mutex_info all_innodb_mutexes[] = {
PSI_KEY(page_zip_stat_per_index_mutex),
PSI_KEY(purge_sys_pq_mutex),
PSI_KEY(recv_sys_mutex),
PSI_KEY(redo_rseg_mutex),
PSI_KEY(noredo_rseg_mutex),
PSI_KEY(srv_innodb_monitor_mutex),
PSI_KEY(srv_misc_tmpfile_mutex),
PSI_KEY(srv_monitor_file_mutex),
Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/include/trx0purge.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Created 3/26/1996 Heikki Tuuri
#ifndef trx0purge_h
#define trx0purge_h

#include "trx0rseg.h"
#include "trx0sys.h"
#include "que0types.h"
#include "srw_lock.h"

Expand Down
183 changes: 105 additions & 78 deletions storage/innobase/include/trx0rseg.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,12 @@ Rollback segment
Created 3/26/1996 Heikki Tuuri
*******************************************************/

#ifndef trx0rseg_h
#define trx0rseg_h

#include "trx0sys.h"
#pragma once
#include "trx0types.h"
#include "fut0lst.h"

#ifdef UNIV_PFS_MUTEX
extern mysql_pfs_key_t redo_rseg_mutex_key;
extern mysql_pfs_key_t noredo_rseg_mutex_key;
#endif /* UNIV_PFS_MUTEX */
#ifdef WITH_WSREP
# include "trx0xa.h"
#endif /* WITH_WSREP */

/** Gets a rollback segment header.
@param[in] space space where placed
Expand Down Expand Up @@ -73,21 +69,8 @@ trx_rseg_header_create(
/** Initialize or recover the rollback segments at startup. */
dberr_t trx_rseg_array_init();

/** Free a rollback segment in memory. */
void
trx_rseg_mem_free(trx_rseg_t* rseg);

/** Create a persistent rollback segment.
@param[in] space_id system or undo tablespace id
@return pointer to new rollback segment
@retval NULL on failure */
trx_rseg_t*
trx_rseg_create(ulint space_id)
MY_ATTRIBUTE((warn_unused_result));

/** Create the temporary rollback segments. */
void
trx_temp_rseg_create();
void trx_temp_rseg_create();

/* Number of undo log slots in a rollback segment file copy */
#define TRX_RSEG_N_SLOTS (srv_page_size / 16)
Expand All @@ -96,51 +79,100 @@ trx_temp_rseg_create();
#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2)

/** The rollback segment memory object */
struct trx_rseg_t {
/*--------------------------------------------------------*/
/** rollback segment id == the index of its slot in the trx
system file copy */
ulint id;

/** mutex protecting the fields in this struct except id,space,page_no
which are constant */
mysql_mutex_t mutex;

/** space where the rollback segment header is placed */
fil_space_t* space;

/** page number of the rollback segment header */
uint32_t page_no;

/** current size in pages */
uint32_t curr_size;
struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) trx_rseg_t
{
/** tablespace containing the rollback segment; constant after init() */
fil_space_t *space;
/** latch protecting everything except page_no, space */
srw_lock_low latch;
/** rollback segment header page number; constant after init() */
uint32_t page_no;
/** length of the TRX_RSEG_HISTORY list (number of transactions) */
uint32_t history_size;

private:
/** Reference counter to track rseg allocated transactions,
with SKIP and NEEDS_PURGE flags. */
std::atomic<uint32_t> ref;

/** Whether undo tablespace truncation is pending */
static constexpr uint32_t SKIP= 1;
/** Whether the log segment needs purge */
static constexpr uint32_t NEEDS_PURGE= 2;
/** Transaction reference count multiplier */
static constexpr uint32_t REF= 4;

uint32_t ref_load() const { return ref.load(std::memory_order_relaxed); }
public:

/** Initialize the fields that are not zero-initialized. */
void init(fil_space_t *space, uint32_t page);
/** Reinitialize the fields on undo tablespace truncation. */
void reinit(uint32_t page);
/** Clean up. */
void destroy();

/** Note that undo tablespace truncation was started. */
void set_skip_allocation()
{ ut_ad(is_persistent()); ref.fetch_or(SKIP, std::memory_order_relaxed); }
/** Note that undo tablespace truncation was completed. */
void clear_skip_allocation()
{
ut_ad(is_persistent());
ut_d(auto r=) ref.fetch_and(~SKIP, std::memory_order_relaxed);
ut_ad(r == SKIP);
}
/** Note that the rollback segment requires purge. */
void set_needs_purge()
{ ref.fetch_or(NEEDS_PURGE, std::memory_order_relaxed); }
/** Note that the rollback segment will not require purge. */
void clear_needs_purge()
{ ref.fetch_and(~NEEDS_PURGE, std::memory_order_relaxed); }
/** @return whether the segment is marked for undo truncation */
bool skip_allocation() const { return ref_load() & SKIP; }
/** @return whether the segment needs purge */
bool needs_purge() const { return ref_load() & NEEDS_PURGE; }
/** Increment the reference count */
void acquire()
{ ut_d(auto r=) ref.fetch_add(REF); ut_ad(!(r & SKIP)); }
/** Increment the reference count if possible
@retval true if the reference count was incremented
@retval false if skip_allocation() holds */
bool acquire_if_available()
{
uint32_t r= 0;
while (!ref.compare_exchange_weak(r, r + REF,
std::memory_order_relaxed,
std::memory_order_relaxed))
if (r & SKIP)
return false;
return true;
}

/*--------------------------------------------------------*/
/* Fields for undo logs */
/** List of undo logs */
UT_LIST_BASE_NODE_T(trx_undo_t) undo_list;
/** Decrement the reference count */
void release()
{
ut_d(const auto r=)
ref.fetch_sub(REF, std::memory_order_relaxed);
ut_ad(r >= REF);
}
/** @return whether references exist */
bool is_referenced() const { return ref_load() >= REF; }

/** List of undo log segments cached for fast reuse */
UT_LIST_BASE_NODE_T(trx_undo_t) undo_cached;
/** current size in pages */
uint32_t curr_size;

/*--------------------------------------------------------*/
/** List of undo logs (transactions) */
UT_LIST_BASE_NODE_T(trx_undo_t) undo_list;
/** List of undo log segments cached for fast reuse */
UT_LIST_BASE_NODE_T(trx_undo_t) undo_cached;

/** Last not yet purged undo log header; FIL_NULL if all purged */
uint32_t last_page_no;

/** trx_t::no | last_offset << 48 */
uint64_t last_commit_and_offset;

/** Whether the log segment needs purge */
bool needs_purge;

/** Reference counter to track rseg allocated transactions. */
ulint trx_ref_count;

/** If true, then skip allocating this rseg as it reside in
UNDO-tablespace marked for truncate. */
bool skip_allocation;

/** @return the commit ID of the last committed transaction */
trx_id_t last_trx_no() const
{ return last_commit_and_offset & ((1ULL << 48) - 1); }
Expand All @@ -153,24 +185,21 @@ struct trx_rseg_t {
last_commit_and_offset= static_cast<uint64_t>(last_offset) << 48 | trx_no;
}

/** @return whether the rollback segment is persistent */
bool is_persistent() const
{
ut_ad(space == fil_system.temp_space
|| space == fil_system.sys_space
|| (srv_undo_space_id_start > 0
&& space->id >= srv_undo_space_id_start
&& space->id <= srv_undo_space_id_start
+ TRX_SYS_MAX_UNDO_SPACES));
ut_ad(space == fil_system.temp_space
|| space == fil_system.sys_space
|| (srv_undo_space_id_start > 0
&& space->id >= srv_undo_space_id_start
&& space->id <= srv_undo_space_id_start
+ srv_undo_tablespaces_open)
|| !srv_was_started);
return(space->id != SRV_TMP_SPACE_ID);
}
/** @return whether the rollback segment is persistent */
bool is_persistent() const
{
ut_ad(space == fil_system.temp_space || space == fil_system.sys_space ||
(srv_undo_space_id_start > 0 &&
space->id >= srv_undo_space_id_start &&
space->id <= srv_undo_space_id_start + TRX_SYS_MAX_UNDO_SPACES));
ut_ad(space == fil_system.temp_space || space == fil_system.sys_space ||
!srv_was_started ||
(srv_undo_space_id_start > 0 &&
space->id >= srv_undo_space_id_start
&& space->id <= srv_undo_space_id_start +
srv_undo_tablespaces_open));
return space->id != SRV_TMP_SPACE_ID;
}
};

/* Undo log segment slot in a rollback segment header */
Expand Down Expand Up @@ -278,5 +307,3 @@ void trx_rseg_update_binlog_offset(buf_block_t *rseg_header, const trx_t *trx,
mtr_t *mtr);

#include "trx0rseg.ic"

#endif
Loading

0 comments on commit 6e12ebd

Please sign in to comment.