Skip to content

Commit

Permalink
Replace trx_sys_t* trx_sys with trx_sys_t trx_sys
Browse files Browse the repository at this point in the history
There is only one transaction system object in InnoDB.
Allocate the storage for it at link time, not at runtime.

lock_rec_fetch_page(): Use the correct fetch mode BUF_GET.
Pages may never be deallocated from a tablespace while
record locks are pointing to them.
  • Loading branch information
dr-m authored and Sergey Vojtovich committed Jan 20, 2018
1 parent 7078203 commit f8882cc
Show file tree
Hide file tree
Showing 26 changed files with 312 additions and 360 deletions.
12 changes: 6 additions & 6 deletions storage/innobase/btr/btr0cur.cc
Expand Up @@ -482,10 +482,10 @@ btr_cur_instant_init_low(dict_index_t* index, mtr_t* mtr)
/* In fact, because we only ever append fields to the 'default
value' record, it is also OK to perform READ UNCOMMITTED and
then ignore any extra fields, provided that
trx_sys->rw_trx_hash.find(DB_TRX_ID). */
trx_sys.rw_trx_hash.find(DB_TRX_ID). */
if (rec_offs_n_fields(offsets) > index->n_fields
&& !trx_sys->rw_trx_hash.find(row_get_rec_trx_id(rec, index,
offsets))) {
&& !trx_sys.rw_trx_hash.find(row_get_rec_trx_id(rec, index,
offsets))) {
goto inconsistent;
}

Expand Down Expand Up @@ -1168,7 +1168,7 @@ btr_cur_search_to_nth_level_func(
Free blocks and read IO bandwidth should be prior
for them, when the history list is glowing huge. */
if (lock_intention == BTR_INTENTION_DELETE
&& trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
&& trx_sys.rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
&& buf_get_n_pending_read_ios()) {
mtr_x_lock(dict_index_get_lock(index), mtr);
} else if (dict_index_is_spatial(index)
Expand Down Expand Up @@ -2308,7 +2308,7 @@ btr_cur_open_at_index_side_func(
Free blocks and read IO bandwidth should be prior
for them, when the history list is glowing huge. */
if (lock_intention == BTR_INTENTION_DELETE
&& trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
&& trx_sys.rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
&& buf_get_n_pending_read_ios()) {
mtr_x_lock(dict_index_get_lock(index), mtr);
} else {
Expand Down Expand Up @@ -2654,7 +2654,7 @@ btr_cur_open_at_rnd_pos_func(
Free blocks and read IO bandwidth should be prior
for them, when the history list is glowing huge. */
if (lock_intention == BTR_INTENTION_DELETE
&& trx_sys->rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
&& trx_sys.rseg_history_len > BTR_CUR_FINE_HISTORY_LENGTH
&& buf_get_n_pending_read_ios()) {
mtr_x_lock(dict_index_get_lock(index), mtr);
} else {
Expand Down
6 changes: 3 additions & 3 deletions storage/innobase/buf/buf0buf.cc
Expand Up @@ -2706,9 +2706,9 @@ buf_pool_resize()
}

lock_mutex_enter();
trx_sys_mutex_enter();
mutex_enter(&trx_sys.mutex);
bool found = false;
for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys.mysql_trx_list);
trx != NULL;
trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
if (trx->state != TRX_STATE_NOT_STARTED
Expand All @@ -2730,7 +2730,7 @@ buf_pool_resize()
stderr, trx);
}
}
trx_sys_mutex_exit();
mutex_exit(&trx_sys.mutex);
lock_mutex_exit();

withdraw_started = ut_time();
Expand Down
36 changes: 15 additions & 21 deletions storage/innobase/handler/ha_innodb.cc
Expand Up @@ -319,7 +319,7 @@ thd_destructor_proxy(void *)
srv_running = NULL;

while (srv_fast_shutdown == 0 &&
(trx_sys_any_active_transactions() ||
(trx_sys.any_active_transactions() ||
(uint)thread_count > srv_n_purge_threads + 1)) {
thd_proc_info(thd, "InnoDB slow shutdown wait");
os_thread_sleep(1000);
Expand Down Expand Up @@ -3160,9 +3160,9 @@ read view to it if there is no read view yet.
Why a deadlock of threads is not possible: the query cache calls this function
at the start of a SELECT processing. Then the calling thread cannot be
holding any InnoDB semaphores. The calling thread is holding the
query cache mutex, and this function will reserve the InnoDB trx_sys->mutex.
query cache mutex, and this function will reserve the InnoDB trx_sys.mutex.
Thus, the 'rank' in sync0mutex.h of the MySQL query cache mutex is above
the InnoDB trx_sys->mutex.
the InnoDB trx_sys.mutex.
@return TRUE if permitted, FALSE if not; note that the value FALSE
does not mean we should invalidate the query cache: invalidation is
called explicitly */
Expand Down Expand Up @@ -3644,9 +3644,9 @@ static ulonglong innodb_prepare_commit_versioned(THD* thd, ulonglong *trx_id)
DBUG_ASSERT(t->first->versioned());
DBUG_ASSERT(trx->rsegs.m_redo.rseg);

mutex_enter(&trx_sys->mutex);
trx_id_t commit_id = trx_sys->get_new_trx_id();
mutex_exit(&trx_sys->mutex);
mutex_enter(&trx_sys.mutex);
trx_id_t commit_id = trx_sys.get_new_trx_id();
mutex_exit(&trx_sys.mutex);

return commit_id;
}
Expand Down Expand Up @@ -16225,12 +16225,9 @@ ha_innobase::external_lock(

} else if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
&& MVCC::is_view_active(trx->read_view)) {

mutex_enter(&trx_sys->mutex);

trx_sys->mvcc->view_close(trx->read_view, true);

mutex_exit(&trx_sys->mutex);
mutex_enter(&trx_sys.mutex);
trx_sys.mvcc->view_close(trx->read_view, true);
mutex_exit(&trx_sys.mutex);
}
}

Expand Down Expand Up @@ -16896,12 +16893,9 @@ ha_innobase::store_lock(

/* At low transaction isolation levels we let
each consistent read set its own snapshot */

mutex_enter(&trx_sys->mutex);

trx_sys->mvcc->view_close(trx->read_view, true);

mutex_exit(&trx_sys->mutex);
mutex_enter(&trx_sys.mutex);
trx_sys.mvcc->view_close(trx->read_view, true);
mutex_enter(&trx_sys.mutex);
}
}

Expand Down Expand Up @@ -19906,9 +19900,9 @@ wsrep_fake_trx_id(
handlerton *hton,
THD *thd) /*!< in: user thread handle */
{
mutex_enter(&trx_sys->mutex);
trx_id_t trx_id = trx_sys->get_new_trx_id();
mutex_exit(&trx_sys->mutex);
mutex_enter(&trx_sys.mutex);
trx_id_t trx_id = trx_sys.get_new_trx_id();
mutex_exit(&trx_sys.mutex);
WSREP_DEBUG("innodb fake trx id: " TRX_ID_FMT " thd: %s",
trx_id, wsrep_thd_query(thd));
wsrep_ws_handle_for_trx(wsrep_thd_ws_handle(thd), trx_id);
Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/include/lock0lock.h
Expand Up @@ -609,7 +609,7 @@ lock_report_trx_id_insanity(
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: index */
const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
trx_id_t max_trx_id); /*!< in: trx_sys->get_max_trx_id() */
trx_id_t max_trx_id); /*!< in: trx_sys.get_max_trx_id() */
/*********************************************************************//**
Prints info of locks for all transactions.
@return FALSE if not able to obtain lock mutex and exits without
Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/include/sync0types.h
Expand Up @@ -160,7 +160,7 @@ V
lock_sys_mutex Mutex protecting lock_sys_t
|
V
trx_sys->mutex Mutex protecting trx_sys_t
trx_sys.mutex Mutex protecting trx_sys_t
|
V
Threads mutex Background thread scheduling mutex
Expand Down
78 changes: 32 additions & 46 deletions storage/innobase/include/trx0sys.h
Expand Up @@ -51,9 +51,6 @@ typedef UT_LIST_BASE_NODE_T(trx_t) trx_ut_list_t;
class MVCC;
class ReadView;

/** The transaction system */
extern trx_sys_t* trx_sys;

/** Checks if a page address is the trx sys header page.
@param[in] page_id page id
@return true if trx sys header page */
Expand All @@ -65,11 +62,6 @@ trx_sys_hdr_page(
/** Initialize the transaction system main-memory data structures. */
void trx_sys_init_at_db_start();

/*****************************************************************//**
Creates the trx_sys instance and initializes purge_queue and mutex. */
void
trx_sys_create(void);
/*================*/
/*****************************************************************//**
Creates and initializes the transaction system at the database creation. */
void
Expand Down Expand Up @@ -210,23 +202,11 @@ bool
trx_sys_read_wsrep_checkpoint(XID* xid);
#endif /* WITH_WSREP */

/*****************************************************************//**
Shutdown/Close the transaction system. */
void
trx_sys_close(void);
/*===============*/
/** Create the rollback segments.
@return whether the creation succeeded */
bool
trx_sys_create_rsegs();

/*********************************************************************
Check if there are any active (non-prepared) transactions.
@return total number of active transactions or 0 if none */
ulint
trx_sys_any_active_transactions(void);
/*=================================*/

/** The automatically created system rollback segment has this id */
#define TRX_SYS_SYSTEM_RSEG_ID 0

Expand Down Expand Up @@ -843,12 +823,11 @@ struct trx_sys_t {
number. Accessed and updated with atomic operations.
*/

char pad0[CACHE_LINE_SIZE];
trx_id_t m_max_trx_id;
char pad1[CACHE_LINE_SIZE];
MY_ALIGNED(CACHE_LINE_SIZE) trx_id_t m_max_trx_id;


public:
MY_ALIGNED(CACHE_LINE_SIZE)
TrxSysMutex mutex; /*!< mutex protecting most fields in
this structure except when noted
otherwise */
Expand All @@ -863,8 +842,7 @@ struct trx_sys_t {
transactions which exist or existed */
#endif /* UNIV_DEBUG */

/** Avoid false sharing */
char pad2[CACHE_LINE_SIZE];
MY_ALIGNED(CACHE_LINE_SIZE)
trx_ut_list_t mysql_trx_list; /*!< List of transactions created
for MySQL. All user transactions are
on mysql_trx_list. The rw_trx_hash
Expand All @@ -875,6 +853,7 @@ struct trx_sys_t {
transactions that have not yet been
started in InnoDB. */

MY_ALIGNED(CACHE_LINE_SIZE)
trx_ids_t rw_trx_ids; /*!< Array of Read write transaction IDs
for MVCC snapshot. A ReadView would take
a snapshot of these transactions whose
Expand All @@ -884,13 +863,11 @@ struct trx_sys_t {
to ensure right order of removal and
consistent snapshot. */

/** Avoid false sharing */
char pad3[CACHE_LINE_SIZE];
MY_ALIGNED(CACHE_LINE_SIZE)
/** Temporary rollback segments */
trx_rseg_t* temp_rsegs[TRX_SYS_N_RSEGS];
/** Avoid false sharing */
char pad4[CACHE_LINE_SIZE];

MY_ALIGNED(CACHE_LINE_SIZE)
trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS];
/*!< Pointer array to rollback
segments; NULL if slot not in use;
Expand All @@ -904,16 +881,13 @@ struct trx_sys_t {
transactions), protected by
rseg->mutex */

char rw_trx_hash_pre_pad[CACHE_LINE_SIZE];


/**
Lock-free hash of in memory read-write transactions.
Works faster when it is on it's own cache line (tested).
*/

rw_trx_hash_t rw_trx_hash;
char rw_trx_hash_post_pad[CACHE_LINE_SIZE];
MY_ALIGNED(CACHE_LINE_SIZE) rw_trx_hash_t rw_trx_hash;

ulint n_prepared_trx; /*!< Number of transactions currently
in the XA PREPARED state */
Expand All @@ -927,6 +901,17 @@ struct trx_sys_t {
transactions. We disable query cache
if such transactions exist. */

/**
Constructor.
We only initialise rw_trx_ids here as it is impossible to postpone it's
initialisation to create().
*/

trx_sys_t(): rw_trx_ids(ut_allocator<trx_id_t>(mem_key_trx_sys_t_rw_trx_ids))
{}


/**
Returns the minimum trx id in rw trx list.
Expand All @@ -950,7 +935,7 @@ struct trx_sys_t {
Determines the maximum transaction id.
@return maximum currently allocated trx id; will be stale after the
next call to trx_sys->get_new_trx_id()
next call to trx_sys.get_new_trx_id()
*/

trx_id_t get_max_trx_id(void)
Expand All @@ -976,7 +961,7 @@ struct trx_sys_t {

trx_id_t get_new_trx_id()
{
ut_ad(mutex_own(&trx_sys->mutex));
ut_ad(mutex_own(&mutex));
trx_id_t id= static_cast<trx_id_t>(my_atomic_add64_explicit(
reinterpret_cast<int64*>(&m_max_trx_id), 1, MY_MEMORY_ORDER_RELAXED));

Expand All @@ -992,6 +977,16 @@ struct trx_sys_t {
}


/** Create the instance */
void create();

/** Close the transaction system on shutdown */
void close();

/** @return total number of active (non-prepared) transactions */
ulint any_active_transactions();


private:
static my_bool get_min_trx_id_callback(rw_trx_hash_element_t *element,
trx_id_t *id)
Expand All @@ -1015,18 +1010,9 @@ struct trx_sys_t {
void flush_max_trx_id();
};

/** Test if trx_sys->mutex is owned. */
#define trx_sys_mutex_own() (trx_sys->mutex.is_owned())

/** Acquire the trx_sys->mutex. */
#define trx_sys_mutex_enter() do { \
mutex_enter(&trx_sys->mutex); \
} while (0)

/** Release the trx_sys->mutex. */
#define trx_sys_mutex_exit() do { \
trx_sys->mutex.exit(); \
} while (0)
/** The transaction system */
extern trx_sys_t trx_sys;

#include "trx0sys.ic"

Expand Down

0 comments on commit f8882cc

Please sign in to comment.