Skip to content

Commit

Permalink
Implement VATS both in InnoDB and XtraDB. Add configuration options f…
Browse files Browse the repository at this point in the history
…or it in both of them.
  • Loading branch information
sensssz committed Oct 12, 2016
1 parent ed4a6f1 commit 6100f59
Show file tree
Hide file tree
Showing 6 changed files with 362 additions and 28 deletions.
29 changes: 29 additions & 0 deletions storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,22 @@ static TYPELIB innodb_checksum_algorithm_typelib = {
NULL
};

/** Possible values of the parameter innodb_lock_schedule_algorithm */
static const char* innodb_lock_schedule_algorithm_names[] = {
"fcfs",
"vats",
NullS
};

/** Used to define an enumerate type of the system variable
innodb_lock_schedule_algorithm. */
static TYPELIB innodb_lock_schedule_algorithm_typelib = {
array_elements(innodb_lock_schedule_algorithm_names) - 1,
"innodb_lock_schedule_algorithm_typelib",
innodb_lock_schedule_algorithm_names,
NULL
};

/* The following counter is used to convey information to InnoDB
about server activity: in case of normal DML ops it is not
sensible to call srv_active_wake_master_thread after each
Expand Down Expand Up @@ -19013,6 +19029,18 @@ static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
NULL, NULL, 120, 1, 127, 0);
#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */

static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
PLUGIN_VAR_RQCMDARG,
"The algorithm Innodb uses for deciding which locks to grant next when"
" a lock is released. Possible values are"
" FCFS"
" grant the locks in First-Come-First-Served order;"
" VATS"
" use the Variance-Aware-Transaction-Scheduling algorithm, which"
" uses an Eldest-Transaction-First heuristic.",
NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
&innodb_lock_schedule_algorithm_typelib);

static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
Expand Down Expand Up @@ -19828,6 +19856,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(ft_sort_pll_degree),
MYSQL_SYSVAR(large_prefix),
MYSQL_SYSVAR(force_load_corrupted),
MYSQL_SYSVAR(lock_schedule_algorithm),
MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout),
#ifdef UNIV_LOG_ARCHIVE
Expand Down
9 changes: 9 additions & 0 deletions storage/innobase/include/lock0lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ Created 5/7/1996 Heikki Tuuri
extern ibool lock_print_waits;
#endif /* UNIV_DEBUG */

/** Alternatives for innodb_lock_schedule_algorithm, which can be changed by
setting innodb_lock_schedule_algorithm. */
enum innodb_lock_schedule_algorithm_t {
INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS, /*!< First Come First Served */
INNODB_LOCK_SCHEDULE_ALGORITHM_VATS /*!< Variance-Aware-Transaction-Scheduling */
};

extern ulong innodb_lock_schedule_algorithm;

/*********************************************************************//**
Gets the size of a lock struct.
@return size in bytes */
Expand Down
158 changes: 144 additions & 14 deletions storage/innobase/lock/lock0lock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ bitmap */

#define LOCK_PAGE_BITMAP_MARGIN 64

/** Lock scheduling algorithm */
ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;

/* An explicit record lock affects both the record and the gap before it.
An implicit x-lock does not affect the gap, it only locks the index
record from read or update.
Expand Down Expand Up @@ -1982,6 +1985,72 @@ wsrep_print_wait_locks(
}
#endif /* WITH_WSREP */

/*********************************************************************//**
Check if lock1 has higher priority than lock2.
NULL has lowest priority.
If neither of them is wait lock, the first one has higher priority.
If only one of them is a wait lock, it has lower priority.
Otherwise, the one with an older transaction has higher priority.
@returns true if lock1 has higher priority, false otherwise. */
bool
has_higher_priority(
lock_t *lock1,
lock_t *lock2)
{
if (lock1 == NULL) {
return false;
} else if (lock2 == NULL) {
return true;
}
if (!lock_get_wait(lock1)) {
return true;
} else if (!lock_get_wait(lock2)) {
return false;
}
return lock1->trx->start_time < lock2->trx->start_time;
}

/*********************************************************************//**
Insert a lock to the hash list according to the mode (whether it is a wait lock)
and the age of the transaction the it is associated with.
If the lock is not a wait lock, insert it to the head of the hash list.
Otherwise, insert it to the middle of the wait locks according to the age of the
transaciton.
*/
static
void
lock_rec_insert_by_trx_age(
lock_t *in_lock, /*!< in: lock to be insert */
bool wait) /*!< in: whether it's a wait lock */
{
ulint space;
ulint page_no;
ulint rec_fold;
hash_cell_t cell;
lock_t* node;
lock_t* next;

space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);
cell = hash_get_nth_cell(lock_sys->rec_hash,
hash_calc_hash(rec_fold, lock_sys->rec_hash));

node = (lock_t *) cell->node;
// If in_lock is not a wait lock, we insert it to the head of the list.
if (node == NULL || !wait || has_higher_priority(in_lock, node)) {
cell->node = in_lock;
in_lock->hash = node;
return;
}
while (node != NULL && has_higher_priority((lock_t *) node->hash, in_lock)) {
node = (lock_t *) node->hash;
}
next = (lock_t *) node->hash;
node->hash = in_lock;
in_lock->hash = next;
}

/*********************************************************************//**
Creates a new record lock and inserts it to the lock queue. Does NOT check
for deadlocks or lock compatibility!
Expand Down Expand Up @@ -2144,13 +2213,19 @@ lock_rec_create(
return(lock);
}
trx_mutex_exit(c_lock->trx);
} else if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS) {
lock_rec_insert_by_trx_age(lock, type_mode & LOCK_WAIT);
} else {
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
lock_rec_fold(space, page_no), lock);
}
#else
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS) {
lock_rec_insert_by_trx_age(lock, type_mode & LOCK_WAIT);
} else {
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
}
#endif /* WITH_WSREP */

if (!caller_owns_trx_mutex) {
Expand Down Expand Up @@ -2822,6 +2897,27 @@ lock_rec_cancel(
trx_mutex_exit(lock->trx);
}

/*************************************************************//**
Move the lock to the head of the hash list. */
static
void
lock_rec_move_to_front(
lock_t *lock_to_move, /*!< in: lock to be moved */
ulint rec_fold) /*!< in: rec fold of the lock */
{
if (lock_to_move != NULL)
{
// Move the target lock to the head of the list
hash_cell_t* cell = hash_get_nth_cell(lock_sys->rec_hash,
hash_calc_hash(rec_fold, lock_sys->rec_hash));
if (lock_to_move != cell->node) {
lock_t *next = (lock_t *) cell->node;
cell->node = lock_to_move;
lock_to_move->hash = next;
}
}
}

/*************************************************************//**
Removes a record lock request, waiting or granted, from the queue and
grants locks to other transactions in the queue if they now are entitled
Expand All @@ -2839,7 +2935,9 @@ lock_rec_dequeue_from_page(
{
ulint space;
ulint page_no;
ulint rec_fold
lock_t* lock;
lock_t* previous = NULL;
trx_lock_t* trx_lock;

ut_ad(lock_mutex_own());
Expand All @@ -2850,6 +2948,7 @@ lock_rec_dequeue_from_page(

space = in_lock->un_member.rec_lock.space;
page_no = in_lock->un_member.rec_lock.page_no;
rec_fold = lock_rec_fold(space, page_no);

in_lock->index->table->n_rec_locks--;

Expand All @@ -2861,20 +2960,51 @@ lock_rec_dequeue_from_page(
MONITOR_INC(MONITOR_RECLOCK_REMOVED);
MONITOR_DEC(MONITOR_NUM_RECLOCK);

/* Check if waiting locks in the queue can now be granted: grant
locks if there are no conflicting locks ahead. Stop at the first
X lock that is waiting or has been granted. */
if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS) {
/* Check if waiting locks in the queue can now be granted: grant
locks if there are no conflicting locks ahead. Stop at the first
X lock that is waiting or has been granted. */

for (lock = lock_rec_get_first_on_page_addr(space, page_no);
lock != NULL;
lock = lock_rec_get_next_on_page(lock)) {
for (lock = lock_rec_get_first_on_page_addr(space, page_no);
lock != NULL;
lock = lock_rec_get_next_on_page(lock)) {

if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {
if (lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {

/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
}
}
} else {
/* Grant locks if there are no conflicting locks ahead.
Move granted locks to the head of the list. */
for (lock = lock_rec_get_first_on_page_addr(space, page_no);
lock != NULL;) {

/* If the lock is a wait lock on this page, and it does not need to wait. */
if ((lock->un_member.rec_lock.space == space)
&& (lock->un_member.rec_lock.page_no == page_no)
&& lock_get_wait(lock)
&& !lock_rec_has_to_wait_in_queue(lock)) {

lock_grant(lock);

if (previous != NULL) {
/* Move the lock to the head of the list. */
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
lock_rec_move_to_front(lock, rec_fold);
} else {
/* Already at the head of the list. */
previous = lock;
}
/* Move on to the next lock. */
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
} else {
previous = lock;
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
}
}
}
}
Expand Down
30 changes: 30 additions & 0 deletions storage/xtradb/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,23 @@ static TYPELIB innodb_empty_free_list_algorithm_typelib = {
NULL
};

/** Possible values of the parameter innodb_lock_schedule_algorithm */
static const char* innodb_lock_schedule_algorithm_names[] = {
"fcfs",
"vats",
NullS
};

/** Used to define an enumerate type of the system variable
innodb_lock_schedule_algorithm. */
static TYPELIB innodb_lock_schedule_algorithm_typelib = {
array_elements(innodb_lock_schedule_algorithm_names) - 1,
"innodb_lock_schedule_algorithm_typelib",
innodb_lock_schedule_algorithm_names,
NULL
};


/* The following counter is used to convey information to InnoDB
about server activity: in case of normal DML ops it is not
sensible to call srv_active_wake_master_thread after each
Expand Down Expand Up @@ -20473,6 +20490,18 @@ static MYSQL_SYSVAR_ENUM(empty_free_list_algorithm,
innodb_srv_empty_free_list_algorithm_validate, NULL, SRV_EMPTY_FREE_LIST_BACKOFF,
&innodb_empty_free_list_algorithm_typelib);

static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
PLUGIN_VAR_RQCMDARG,
"The algorithm Innodb uses for deciding which locks to grant next when"
" a lock is released. Possible values are"
" FCFS"
" grant the locks in First-Come-First-Served order;"
" VATS"
" use the Variance-Aware-Transaction-Scheduling algorithm, which"
" uses an Eldest-Transaction-First heuristic.",
NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
&innodb_lock_schedule_algorithm_typelib);

static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
Expand Down Expand Up @@ -21366,6 +21395,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(ft_sort_pll_degree),
MYSQL_SYSVAR(large_prefix),
MYSQL_SYSVAR(force_load_corrupted),
MYSQL_SYSVAR(lock_schedule_algorithm),
MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout),
#ifdef UNIV_LOG_ARCHIVE
Expand Down
9 changes: 9 additions & 0 deletions storage/xtradb/include/lock0lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ Created 5/7/1996 Heikki Tuuri
extern ibool lock_print_waits;
#endif /* UNIV_DEBUG */

/** Alternatives for innodb_lock_schedule_algorithm, which can be changed by
setting innodb_lock_schedule_algorithm. */
enum innodb_lock_schedule_algorithm_t {
INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS, /*!< First Come First Served */
INNODB_LOCK_SCHEDULE_ALGORITHM_VATS /*!< Variance-Aware-Transaction-Scheduling */
};

extern ulong innodb_lock_schedule_algorithm;

extern ulint srv_n_lock_deadlock_count;

/*********************************************************************//**
Expand Down
Loading

0 comments on commit 6100f59

Please sign in to comment.