Skip to content

Commit cc49f00

Browse files
committed
Move InnoDB/XtraDB to async deadlock kill for parallel replication.
In 10.2, use the thd_rpl_deadlock_check() API. This way, all the locking hacks around thd_report_wait_for() can be removed. Now parallel replication deadlock kill happens asynchroneously, from the slave background thread. In InnoDB, remove also the buffering of wait reports, to simplify the code, as this is no longer needed when the locking issues are gone. In XtraDB, the buffering is kept for now. This is just because presumably XtraDB will eventually be updated to MySQL 5.7-based InnoDB as well, so there is little need to modify the existing code only for clean-up purposes. The old synchronous function thd_report_wait_for() is no longer used and removed in this patch. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
1 parent 4d3e366 commit cc49f00

File tree

7 files changed

+41
-211
lines changed

7 files changed

+41
-211
lines changed

sql/sql_class.cc

Lines changed: 17 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -4598,19 +4598,20 @@ extern "C" int thd_rpl_is_parallel(const MYSQL_THD thd)
45984598
}
45994599

46004600
/*
4601-
This function can optionally be called to check if thd_report_wait_for()
4601+
This function can optionally be called to check if thd_rpl_deadlock_check()
46024602
needs to be called for waits done by a given transaction.
46034603
4604-
If this function returns false for a given thd, there is no need to do any
4605-
calls to thd_report_wait_for() on that thd.
4604+
If this function returns false for a given thd, there is no need to do
4605+
any calls to thd_rpl_deadlock_check() on that thd.
46064606
4607-
This call is optional; it is safe to call thd_report_wait_for() in any case.
4608-
This call can be used to save some redundant calls to thd_report_wait_for()
4609-
if desired. (This is unlikely to matter much unless there are _lots_ of
4610-
waits to report, as the overhead of thd_report_wait_for() is small).
4607+
This call is optional; it is safe to call thd_rpl_deadlock_check() in
4608+
any case. This call can be used to save some redundant calls to
4609+
thd_rpl_deadlock_check() if desired. (This is unlikely to matter much
4610+
unless there are _lots_ of waits to report, as the overhead of
4611+
thd_rpl_deadlock_check() is small).
46114612
*/
46124613
extern "C" int
4613-
thd_need_wait_for(const MYSQL_THD thd)
4614+
thd_need_wait_reports(const MYSQL_THD thd)
46144615
{
46154616
rpl_group_info *rgi;
46164617

@@ -4625,75 +4626,9 @@ thd_need_wait_for(const MYSQL_THD thd)
46254626
}
46264627

46274628
/*
4628-
Used by InnoDB/XtraDB to report that one transaction THD is about to go to
4629-
wait for a transactional lock held by another transactions OTHER_THD.
4630-
4631-
This is used for parallel replication, where transactions are required to
4632-
commit in the same order on the slave as they did on the master. If the
4633-
transactions on the slave encounters lock conflicts on the slave that did
4634-
not exist on the master, this can cause deadlocks.
4635-
4636-
Normally, such conflicts will not occur, because the same conflict would
4637-
have prevented the two transactions from committing in parallel on the
4638-
master, thus preventing them from running in parallel on the slave in the
4639-
first place. However, it is possible in case when the optimizer chooses a
4640-
different plan on the slave than on the master (eg. table scan instead of
4641-
index scan).
4642-
4643-
InnoDB/XtraDB reports lock waits using this call. If a lock wait causes a
4644-
deadlock with the pre-determined commit order, we kill the later transaction,
4645-
and later re-try it, to resolve the deadlock.
4646-
4647-
This call need only receive reports about waits for locks that will remain
4648-
until the holding transaction commits. InnoDB/XtraDB auto-increment locks
4649-
are released earlier, and so need not be reported. (Such false positives are
4650-
not harmful, but could lead to unnecessary kill and retry, so best avoided).
4651-
*/
4652-
extern "C" void
4653-
thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd)
4654-
{
4655-
rpl_group_info *rgi;
4656-
rpl_group_info *other_rgi;
4657-
4658-
if (!thd)
4659-
return;
4660-
DEBUG_SYNC(thd, "thd_report_wait_for");
4661-
thd->transaction.stmt.mark_trans_did_wait();
4662-
if (!other_thd)
4663-
return;
4664-
binlog_report_wait_for(thd, other_thd);
4665-
rgi= thd->rgi_slave;
4666-
other_rgi= other_thd->rgi_slave;
4667-
if (!rgi || !other_rgi)
4668-
return;
4669-
if (!rgi->is_parallel_exec)
4670-
return;
4671-
if (rgi->rli != other_rgi->rli)
4672-
return;
4673-
if (!rgi->gtid_sub_id || !other_rgi->gtid_sub_id)
4674-
return;
4675-
if (rgi->current_gtid.domain_id != other_rgi->current_gtid.domain_id)
4676-
return;
4677-
if (rgi->gtid_sub_id > other_rgi->gtid_sub_id)
4678-
return;
4679-
/*
4680-
This transaction is about to wait for another transaction that is required
4681-
by replication binlog order to commit after. This would cause a deadlock.
4682-
4683-
So send a kill to the other transaction, with a temporary error; this will
4684-
cause replication to rollback (and later re-try) the other transaction,
4685-
releasing the lock for this transaction so replication can proceed.
4686-
*/
4687-
other_rgi->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED;
4688-
mysql_mutex_lock(&other_thd->LOCK_thd_data);
4689-
other_thd->awake(KILL_CONNECTION);
4690-
mysql_mutex_unlock(&other_thd->LOCK_thd_data);
4691-
}
4692-
4693-
/*
4694-
Used by storage engines (currently TokuDB) to report that one transaction
4695-
THD is about to go to wait for a transactional lock held by another
4696-
transactions OTHER_THD.
4629+
Used by storage engines (currently TokuDB and InnoDB/XtraDB) to report that
4630+
one transaction THD is about to go to wait for a transactional lock held by
4631+
another transactions OTHER_THD.
46974632
46984633
This is used for parallel replication, where transactions are required to
46994634
commit in the same order on the slave as they did on the master. If the
@@ -4708,9 +4643,9 @@ thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd)
47084643
chooses a different plan on the slave than on the master (eg. table scan
47094644
instead of index scan).
47104645
4711-
InnoDB/XtraDB reports lock waits using this call. If a lock wait causes a
4712-
deadlock with the pre-determined commit order, we kill the later transaction,
4713-
and later re-try it, to resolve the deadlock.
4646+
Storage engines report lock waits using this call. If a lock wait causes a
4647+
deadlock with the pre-determined commit order, we kill the later
4648+
transaction, and later re-try it, to resolve the deadlock.
47144649
47154650
This call need only receive reports about waits for locks that will remain
47164651
until the holding transaction commits. InnoDB/XtraDB auto-increment locks,
@@ -4801,8 +4736,8 @@ thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd)
48014736
48024737
Calling this function is just an optimisation to avoid unnecessary
48034738
deadlocks. If it was not used, a gap lock would be set that could eventually
4804-
cause a deadlock; the deadlock would be caught by thd_report_wait_for() and
4805-
the transaction T2 killed and rolled back (and later re-tried).
4739+
cause a deadlock; the deadlock would be caught by thd_rpl_deadlock_check()
4740+
and the transaction T2 killed and rolled back (and later re-tried).
48064741
*/
48074742
extern "C" int
48084743
thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd)

storage/innobase/handler/ha_innodb.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5685,8 +5685,7 @@ innobase_kill_query(
56855685
wsrep_thd_is_BF(current_thd, FALSE));
56865686
}
56875687

5688-
if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
5689-
trx->abort_type == TRX_SERVER_ABORT) {
5688+
if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
56905689
ut_ad(!lock_mutex_own());
56915690
lock_mutex_enter();
56925691
lock_mutex_taken = true;

storage/innobase/include/trx0trx.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -842,8 +842,7 @@ lock_sys->mutex and sometimes by trx->mutex. */
842842

843843
typedef enum {
844844
TRX_SERVER_ABORT = 0,
845-
TRX_WSREP_ABORT = 1,
846-
TRX_REPLICATION_ABORT = 2
845+
TRX_WSREP_ABORT = 1
847846
} trx_abort_t;
848847

849848

storage/innobase/lock/lock0lock.cc

Lines changed: 14 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,8 @@ static const ulint TABLE_LOCK_CACHE = 8;
7070
/** Size in bytes, of the table lock instance */
7171
static const ulint TABLE_LOCK_SIZE = sizeof(ib_lock_t);
7272

73-
/* Buffer to collect THDs to report waits for. */
74-
struct thd_wait_reports {
75-
struct thd_wait_reports *next; /*!< List link */
76-
ulint used; /*!< How many elements in waitees[] */
77-
trx_t *waitees[64]; /*!< Trxs for thd_report_wait_for() */
78-
};
79-
80-
extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd);
81-
extern "C" int thd_need_wait_for(const MYSQL_THD thd);
73+
extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd);
74+
extern "C" int thd_need_wait_reports(const MYSQL_THD thd);
8275
extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
8376

8477
/** Deadlock checker. */
@@ -109,15 +102,15 @@ class DeadlockChecker {
109102
const trx_t* trx,
110103
const lock_t* wait_lock,
111104
ib_uint64_t mark_start,
112-
struct thd_wait_reports* waitee_buf_ptr)
105+
bool report_waiters)
113106
:
114107
m_cost(),
115108
m_start(trx),
116109
m_too_deep(),
117110
m_wait_lock(wait_lock),
118111
m_mark_start(mark_start),
119112
m_n_elems(),
120-
m_waitee_ptr(waitee_buf_ptr)
113+
m_report_waiters(report_waiters)
121114
{
122115
}
123116

@@ -276,8 +269,8 @@ class DeadlockChecker {
276269
/** This is to avoid malloc/free calls. */
277270
static state_t s_states[MAX_STACK_SIZE];
278271

279-
/** Buffer to collect THDs to report waits for. */
280-
struct thd_wait_reports* m_waitee_ptr;
272+
/** Set if thd_rpl_deadlock_check() should be called for waits. */
273+
bool m_report_waiters;
281274
};
282275

283276
/** Counter to mark visited nodes during deadlock search. */
@@ -286,11 +279,6 @@ ib_uint64_t DeadlockChecker::s_lock_mark_counter = 0;
286279
/** The stack used for deadlock searches. */
287280
DeadlockChecker::state_t DeadlockChecker::s_states[MAX_STACK_SIZE];
288281

289-
extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd);
290-
extern "C" int thd_need_wait_for(const MYSQL_THD thd);
291-
extern "C"
292-
int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
293-
294282
#ifdef UNIV_DEBUG
295283
/*********************************************************************//**
296284
Validates the lock system.
@@ -2074,14 +2062,6 @@ RecLock::add_to_waitq(const lock_t* wait_for, const lock_prdt_t* prdt)
20742062
dberr_t err = deadlock_check(lock);
20752063

20762064
ut_ad(trx_mutex_own(m_trx));
2077-
2078-
/* m_trx->mysql_thd is NULL if it's an internal trx. So current_thd is used */
2079-
if (err == DB_LOCK_WAIT) {
2080-
ut_ad(wait_for && wait_for->trx);
2081-
wait_for->trx->abort_type = TRX_REPLICATION_ABORT;
2082-
thd_report_wait_for(current_thd, wait_for->trx->mysql_thd);
2083-
wait_for->trx->abort_type = TRX_SERVER_ABORT;
2084-
}
20852065
return(err);
20862066
}
20872067

@@ -7968,27 +7948,11 @@ DeadlockChecker::search()
79687948
layer. These locks are released before commit, so they
79697949
can not cause deadlocks with binlog-fixed commit
79707950
order. */
7971-
if (m_waitee_ptr &&
7951+
if (m_report_waiters &&
79727952
(lock_get_type_low(lock) != LOCK_TABLE ||
79737953
lock_get_mode(lock) != LOCK_AUTO_INC)) {
7974-
if (m_waitee_ptr->used ==
7975-
sizeof(m_waitee_ptr->waitees) /
7976-
sizeof(m_waitee_ptr->waitees[0])) {
7977-
m_waitee_ptr->next =
7978-
(struct thd_wait_reports *)
7979-
ut_malloc_nokey(sizeof(*m_waitee_ptr));
7980-
m_waitee_ptr = m_waitee_ptr->next;
7981-
7982-
if (!m_waitee_ptr) {
7983-
m_too_deep = true;
7984-
return (m_start);
7985-
}
7986-
7987-
m_waitee_ptr->next = NULL;
7988-
m_waitee_ptr->used = 0;
7989-
}
7990-
7991-
m_waitee_ptr->waitees[m_waitee_ptr->used++] = lock->trx;
7954+
thd_rpl_deadlock_check(m_start->mysql_thd,
7955+
lock->trx->mysql_thd);
79927956
}
79937957

79947958
if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
@@ -8068,47 +8032,6 @@ DeadlockChecker::trx_rollback()
80688032
trx_mutex_exit(trx);
80698033
}
80708034

8071-
static
8072-
void
8073-
lock_report_waiters_to_mysql(
8074-
/*=======================*/
8075-
struct thd_wait_reports* waitee_buf_ptr, /*!< in: set of trxs */
8076-
THD* mysql_thd, /*!< in: THD */
8077-
const trx_t* victim_trx) /*!< in: Trx selected
8078-
as deadlock victim, if
8079-
any */
8080-
{
8081-
struct thd_wait_reports* p;
8082-
struct thd_wait_reports* q;
8083-
ulint i;
8084-
8085-
p = waitee_buf_ptr;
8086-
while (p) {
8087-
i = 0;
8088-
while (i < p->used) {
8089-
trx_t *w_trx = p->waitees[i];
8090-
/* There is no need to report waits to a trx already
8091-
selected as a victim. */
8092-
if (w_trx != victim_trx) {
8093-
/* If thd_report_wait_for() decides to kill the
8094-
transaction, then we will get a call back into
8095-
innobase_kill_query. We mark this by setting
8096-
current_lock_mutex_owner, so we can avoid trying
8097-
to recursively take lock_sys->mutex. */
8098-
w_trx->abort_type = TRX_REPLICATION_ABORT;
8099-
thd_report_wait_for(mysql_thd, w_trx->mysql_thd);
8100-
w_trx->abort_type = TRX_SERVER_ABORT;
8101-
}
8102-
++i;
8103-
}
8104-
q = p->next;
8105-
if (p != waitee_buf_ptr) {
8106-
ut_free(p);
8107-
}
8108-
p = q;
8109-
}
8110-
}
8111-
81128035
/** Checks if a joining lock request results in a deadlock. If a deadlock is
81138036
found this function will resolve the deadlock by choosing a victim transaction
81148037
and rolling it back. It will attempt to resolve all deadlocks. The returned
@@ -8127,36 +8050,20 @@ DeadlockChecker::check_and_resolve(const lock_t* lock, const trx_t* trx)
81278050
ut_ad(!srv_read_only_mode);
81288051

81298052
const trx_t* victim_trx;
8130-
struct thd_wait_reports waitee_buf;
8131-
struct thd_wait_reports*waitee_buf_ptr;
8132-
THD* start_mysql_thd;
8053+
THD* start_mysql_thd;
8054+
bool report_waits = false;
81338055

81348056
start_mysql_thd = trx->mysql_thd;
81358057

8136-
if (start_mysql_thd && thd_need_wait_for(start_mysql_thd)) {
8137-
waitee_buf_ptr = &waitee_buf;
8138-
} else {
8139-
waitee_buf_ptr = NULL;
8140-
}
8058+
if (start_mysql_thd && thd_need_wait_reports(start_mysql_thd))
8059+
report_waits = true;
81418060

81428061
/* Try and resolve as many deadlocks as possible. */
81438062
do {
8144-
if (waitee_buf_ptr) {
8145-
waitee_buf_ptr->next = NULL;
8146-
waitee_buf_ptr->used = 0;
8147-
}
8148-
8149-
DeadlockChecker checker(trx, lock, s_lock_mark_counter, waitee_buf_ptr);
8063+
DeadlockChecker checker(trx, lock, s_lock_mark_counter, report_waits);
81508064

81518065
victim_trx = checker.search();
81528066

8153-
/* Report waits to upper layer, as needed. */
8154-
if (waitee_buf_ptr) {
8155-
lock_report_waiters_to_mysql(waitee_buf_ptr,
8156-
start_mysql_thd,
8157-
victim_trx);
8158-
}
8159-
81608067
/* Search too deep, we rollback the joining transaction only
81618068
if it is possible to rollback. Otherwise we rollback the
81628069
transaction that is holding the lock that the joining

storage/xtradb/handler/ha_innodb.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5441,8 +5441,7 @@ innobase_kill_connection(
54415441
wsrep_thd_is_BF(current_thd, FALSE),
54425442
lock_get_info(trx->lock.wait_lock).c_str());
54435443

5444-
if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
5445-
trx->abort_type == TRX_SERVER_ABORT) {
5444+
if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
54465445
ut_ad(!lock_mutex_own());
54475446
lock_mutex_enter();
54485447
}
@@ -5462,8 +5461,7 @@ innobase_kill_connection(
54625461
trx_mutex_exit(trx);
54635462
}
54645463

5465-
if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
5466-
trx->abort_type == TRX_SERVER_ABORT) {
5464+
if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
54675465
lock_mutex_exit();
54685466
}
54695467
}

storage/xtradb/include/trx0trx.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -707,8 +707,7 @@ lock_sys->mutex and sometimes by trx->mutex. */
707707

708708
typedef enum {
709709
TRX_SERVER_ABORT = 0,
710-
TRX_WSREP_ABORT = 1,
711-
TRX_REPLICATION_ABORT = 2
710+
TRX_WSREP_ABORT = 1
712711
} trx_abort_t;
713712

714713
struct trx_t{

0 commit comments

Comments
 (0)