Skip to content

Commit

Permalink
MDEV-14756 - Remove trx_sys_t::rw_trx_list
Browse files Browse the repository at this point in the history
Use atomic operations when accessing trx_sys_t::max_trx_id. We can't yet
move trx_sys_t::get_new_trx_id() out of mutex because it must be updated
atomically along with trx_sys_t::rw_trx_ids.
  • Loading branch information
Sergey Vojtovich committed Jan 20, 2018
1 parent c6d2842 commit 7078203
Show file tree
Hide file tree
Showing 10 changed files with 118 additions and 159 deletions.
4 changes: 2 additions & 2 deletions storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3645,7 +3645,7 @@ static ulonglong innodb_prepare_commit_versioned(THD* thd, ulonglong *trx_id)
DBUG_ASSERT(trx->rsegs.m_redo.rseg);

mutex_enter(&trx_sys->mutex);
trx_id_t commit_id = trx_sys_get_new_trx_id();
trx_id_t commit_id = trx_sys->get_new_trx_id();
mutex_exit(&trx_sys->mutex);

return commit_id;
Expand Down Expand Up @@ -19907,7 +19907,7 @@ wsrep_fake_trx_id(
THD *thd) /*!< in: user thread handle */
{
mutex_enter(&trx_sys->mutex);
trx_id_t trx_id = trx_sys_get_new_trx_id();
trx_id_t trx_id = trx_sys->get_new_trx_id();
mutex_exit(&trx_sys->mutex);
WSREP_DEBUG("innodb fake trx id: " TRX_ID_FMT " thd: %s",
trx_id, wsrep_thd_query(thd));
Expand Down
7 changes: 3 additions & 4 deletions storage/innobase/include/lock0lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@ lock_report_trx_id_insanity(
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: index */
const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
trx_id_t max_trx_id); /*!< in: trx_sys_get_max_trx_id() */
trx_id_t max_trx_id); /*!< in: trx_sys->get_max_trx_id() */
/*********************************************************************//**
Prints info of locks for all transactions.
@return FALSE if not able to obtain lock mutex and exits without
Expand Down Expand Up @@ -827,7 +827,6 @@ Set the lock system timeout event. */
void
lock_set_timeout_event();
/*====================*/
#ifdef UNIV_DEBUG
/*********************************************************************//**
Checks that a transaction id is sensible, i.e., not in the future.
@return true if ok */
Expand All @@ -837,8 +836,8 @@ lock_check_trx_id_sanity(
trx_id_t trx_id, /*!< in: trx id */
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: index */
const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
MY_ATTRIBUTE((warn_unused_result));
const ulint* offsets); /*!< in: rec_get_offsets(rec, index) */
#ifdef UNIV_DEBUG
/*******************************************************************//**
Check if the transaction holds any locks on the sys tables
or its records.
Expand Down
110 changes: 76 additions & 34 deletions storage/innobase/include/trx0sys.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,21 +132,6 @@ trx_sysf_rseg_set_page_no(
ulint page_no, /*!< in: page number, FIL_NULL if
the slot is reset to unused */
mtr_t* mtr); /*!< in: mtr */
/*****************************************************************//**
Allocates a new transaction id.
@return new, allocated trx id */
UNIV_INLINE
trx_id_t
trx_sys_get_new_trx_id();
/*===================*/
/*****************************************************************//**
Determines the maximum transaction id.
@return maximum currently allocated trx id; will be stale after the
next call to trx_sys_get_new_trx_id() */
UNIV_INLINE
trx_id_t
trx_sys_get_max_trx_id(void);
/*========================*/

#ifdef UNIV_DEBUG
/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
Expand Down Expand Up @@ -419,6 +404,11 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */

/** Size of the doublewrite block in pages */
#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE

/** When a trx id which is zero modulo this number (which must be a power of
two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
page is updated */
#define TRX_SYS_TRX_ID_WRITE_MARGIN ((trx_id_t) 256)
/* @} */

trx_t* current_trx();
Expand Down Expand Up @@ -847,20 +837,24 @@ class rw_trx_hash_t

/** The transaction system central memory data structure. */
struct trx_sys_t {
private:
/**
The smallest number not yet assigned as a transaction id or transaction
number. Accessed and updated with atomic operations.
*/

char pad0[CACHE_LINE_SIZE];
trx_id_t m_max_trx_id;
char pad1[CACHE_LINE_SIZE];


public:
TrxSysMutex mutex; /*!< mutex protecting most fields in
this structure except when noted
otherwise */

MVCC* mvcc; /*!< Multi version concurrency control
manager */
volatile trx_id_t
max_trx_id; /*!< The smallest number not yet
assigned as a transaction id or
transaction number. This is declared
volatile because it can be accessed
without holding any mutex during
AC-NL-RO view creation. */
trx_ut_list_t serialisation_list;
/*!< Ordered on trx_t::no of all the
currenrtly active RW transactions */
Expand All @@ -870,7 +864,7 @@ struct trx_sys_t {
#endif /* UNIV_DEBUG */

/** Avoid false sharing */
const char pad2[CACHE_LINE_SIZE];
char pad2[CACHE_LINE_SIZE];
trx_ut_list_t mysql_trx_list; /*!< List of transactions created
for MySQL. All user transactions are
on mysql_trx_list. The rw_trx_hash
Expand All @@ -891,11 +885,11 @@ struct trx_sys_t {
consistent snapshot. */

/** Avoid false sharing */
const char pad3[CACHE_LINE_SIZE];
char pad3[CACHE_LINE_SIZE];
/** Temporary rollback segments */
trx_rseg_t* temp_rsegs[TRX_SYS_N_RSEGS];
/** Avoid false sharing */
const char pad4[CACHE_LINE_SIZE];
char pad4[CACHE_LINE_SIZE];

trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS];
/*!< Pointer array to rollback
Expand All @@ -910,7 +904,7 @@ struct trx_sys_t {
transactions), protected by
rseg->mutex */

const char rw_trx_hash_pre_pad[CACHE_LINE_SIZE];
char rw_trx_hash_pre_pad[CACHE_LINE_SIZE];


/**
Expand All @@ -919,7 +913,7 @@ struct trx_sys_t {
*/

rw_trx_hash_t rw_trx_hash;
const char rw_trx_hash_post_pad[CACHE_LINE_SIZE];
char rw_trx_hash_post_pad[CACHE_LINE_SIZE];

ulint n_prepared_trx; /*!< Number of transactions currently
in the XA PREPARED state */
Expand All @@ -940,18 +934,64 @@ struct trx_sys_t {
must look at the trx->state to find out if the minimum trx id transaction
itself is active, or already committed.)
@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty
@return the minimum trx id, or m_max_trx_id if the trx list is empty
*/

trx_id_t get_min_trx_id()
{
trx_id_t id= trx_sys_get_max_trx_id();
trx_id_t id= get_max_trx_id();
rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action>
(get_min_trx_id_callback), &id);
return id;
}


/**
Determines the maximum transaction id.
@return maximum currently allocated trx id; will be stale after the
next call to trx_sys->get_new_trx_id()
*/

trx_id_t get_max_trx_id(void)
{
return static_cast<trx_id_t>
(my_atomic_load64_explicit(reinterpret_cast<int64*>(&m_max_trx_id),
MY_MEMORY_ORDER_RELAXED));
}


/**
Allocates a new transaction id.
VERY important: after the database is started, m_max_trx_id value is
divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
will evaluate to TRUE when this function is first time called,
and the value for trx id will be written to disk-based header!
Thus trx id values will not overlap when the database is
repeatedly started!
@return new, allocated trx id
*/

trx_id_t get_new_trx_id()
{
ut_ad(mutex_own(&trx_sys->mutex));
trx_id_t id= static_cast<trx_id_t>(my_atomic_add64_explicit(
reinterpret_cast<int64*>(&m_max_trx_id), 1, MY_MEMORY_ORDER_RELAXED));

if (UNIV_UNLIKELY(!(id % TRX_SYS_TRX_ID_WRITE_MARGIN)))
flush_max_trx_id();
return(id);
}


void init_max_trx_id(trx_id_t value)
{
m_max_trx_id= value;
}


private:
static my_bool get_min_trx_id_callback(rw_trx_hash_element_t *element,
trx_id_t *id)
Expand All @@ -966,12 +1006,14 @@ struct trx_sys_t {
}
return 0;
}
};

/** When a trx id which is zero modulo this number (which must be a power of
two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
page is updated */
#define TRX_SYS_TRX_ID_WRITE_MARGIN ((trx_id_t) 256)

/**
Writes the value of m_max_trx_id to the file based trx system header.
*/

void flush_max_trx_id();
};

/** Test if trx_sys->mutex is owned. */
#define trx_sys_mutex_own() (trx_sys->mutex.is_owned())
Expand Down
61 changes: 0 additions & 61 deletions storage/innobase/include/trx0sys.ic
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,6 @@ typedef byte trx_sysf_rseg_t;
/* Size of a rollback segment specification slot */
#define TRX_SYS_RSEG_SLOT_SIZE 8

/*****************************************************************//**
Writes the value of max_trx_id to the file based trx system header. */
void
trx_sys_flush_max_trx_id(void);
/*==========================*/

/** Checks if a page address is the trx sys header page.
@param[in] page_id page id
@return true if trx sys header page */
Expand Down Expand Up @@ -191,58 +185,3 @@ trx_write_trx_id(
ut_ad(id > 0);
mach_write_to_6(ptr, id);
}

/*****************************************************************//**
Allocates a new transaction id.
@return new, allocated trx id */
UNIV_INLINE
trx_id_t
trx_sys_get_new_trx_id()
/*====================*/
{
/* wsrep_fake_trx_id violates this assert */
ut_ad(trx_sys_mutex_own());

/* VERY important: after the database is started, max_trx_id value is
divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
will evaluate to TRUE when this function is first time called,
and the value for trx id will be written to disk-based header!
Thus trx id values will not overlap when the database is
repeatedly started! */

if (!(trx_sys->max_trx_id % TRX_SYS_TRX_ID_WRITE_MARGIN)) {

trx_sys_flush_max_trx_id();
}

return(trx_sys->max_trx_id++);
}

/*****************************************************************//**
Determines the maximum transaction id.
@return maximum currently allocated trx id; will be stale after the
next call to trx_sys_get_new_trx_id() */
UNIV_INLINE
trx_id_t
trx_sys_get_max_trx_id(void)
/*========================*/
{
ut_ad(!trx_sys_mutex_own());

#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
/* Avoid torn reads. */

trx_sys_mutex_enter();

trx_id_t max_trx_id = trx_sys->max_trx_id;

trx_sys_mutex_exit();

return(max_trx_id);
#else
/* Perform a dirty read. Callers should be prepared for stale
values, and we know that the value fits in a machine word, so
that it will be read and written atomically. */
return(trx_sys->max_trx_id);
#endif /* UNIV_WORD_SIZE < DATA_TRX_ID_LEN */
}
20 changes: 7 additions & 13 deletions storage/innobase/lock/lock0lock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ lock_report_trx_id_insanity(
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: index */
const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */
trx_id_t max_trx_id) /*!< in: trx_sys->get_max_trx_id() */
{
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!rec_is_default_row(rec, index));
Expand All @@ -371,11 +371,6 @@ lock_report_trx_id_insanity(
/*********************************************************************//**
Checks that a transaction id is sensible, i.e., not in the future.
@return true if ok */
#ifdef UNIV_DEBUG

#else
static MY_ATTRIBUTE((warn_unused_result))
#endif
bool
lock_check_trx_id_sanity(
/*=====================*/
Expand All @@ -387,15 +382,14 @@ lock_check_trx_id_sanity(
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!rec_is_default_row(rec, index));

trx_id_t max_trx_id = trx_sys_get_max_trx_id();
bool is_ok = trx_id < max_trx_id;
trx_id_t max_trx_id = trx_sys->get_max_trx_id();

if (!is_ok) {
if (trx_id >= max_trx_id) {
lock_report_trx_id_insanity(
trx_id, rec, index, offsets, max_trx_id);
return false;
}

return(is_ok);
return(true);
}

/*********************************************************************//**
Expand Down Expand Up @@ -5215,7 +5209,7 @@ lock_release(
{
lock_t* lock;
ulint count = 0;
trx_id_t max_trx_id = trx_sys_get_max_trx_id();
trx_id_t max_trx_id = trx_sys->get_max_trx_id();

ut_ad(lock_mutex_own());
ut_ad(!trx_mutex_own(trx));
Expand Down Expand Up @@ -5639,7 +5633,7 @@ lock_print_info_summary(
"------------\n", file);

fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
trx_sys_get_max_trx_id());
trx_sys->get_max_trx_id());

fprintf(file,
"Purge done for trx's n:o < " TRX_ID_FMT
Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/page/page0page.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2432,7 +2432,7 @@ page_validate(
&& page_is_leaf(page)
&& !page_is_empty(page)) {
trx_id_t max_trx_id = page_get_max_trx_id(page);
trx_id_t sys_max_trx_id = trx_sys_get_max_trx_id();
trx_id_t sys_max_trx_id = trx_sys->get_max_trx_id();

if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) {
ib::error() << "PAGE_MAX_TRX_ID out of bounds: "
Expand Down
4 changes: 2 additions & 2 deletions storage/innobase/read/read0read.cc
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ ReadView::prepare(trx_id_t id)

m_creator_trx_id = id;

m_low_limit_no = m_low_limit_id = trx_sys->max_trx_id;
m_low_limit_no = m_low_limit_id = trx_sys->get_max_trx_id();

if (!trx_sys->rw_trx_ids.empty()) {
copy_trx_ids(trx_sys->rw_trx_ids);
Expand Down Expand Up @@ -580,7 +580,7 @@ MVCC::view_open(ReadView*& view, trx_t* trx)

view->m_closed = false;

if (view->m_low_limit_id == trx_sys_get_max_trx_id()) {
if (view->m_low_limit_id == trx_sys->get_max_trx_id()) {
return;
} else {
view->m_closed = true;
Expand Down
Loading

0 comments on commit 7078203

Please sign in to comment.