Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MDEV-29622 Wrong assertions in lock_cancel_waiting_and_release() for …
…deadlock resolving caller Suppose we have two transactions, trx 1 and trx 2. trx 2 does deadlock resolving from lock_wait(), it sets victim->lock.was_chosen_as_deadlock_victim=true for trx 1, but has not yet invoked lock_cancel_waiting_and_release(). trx 1 checks the flag in lock_trx_handle_wait(), and starts rollback from row_mysql_handle_errors(). It can change trx->lock.wait_thr and trx->state as it holds trx_t::mutex, but trx 2 has not yet requested it, as lock_cancel_waiting_and_release() has not yet been called. After that trx 1 tries to release locks in trx_t::rollback_low(), invoking trx_t::rollback_finish(). lock_release() is blocked on try to acquire lock_sys.rd_lock(SRW_LOCK_CALL) in lock_release_try(), as lock_sys is blocked by trx 2, as deadlock resolution works under lock_sys.wr_lock(SRW_LOCK_CALL), see Deadlock::report() for details. trx 2 executes lock_cancel_waiting_and_release() for deadlock victim, i. e. for trx 1. lock_cancel_waiting_and_release() contains some trx->lock.wait_thr and trx->state assertions, which will fail, because trx 1 has changed them during rollback execution. So, according to the above scenario, it's legal to have trx->lock.wait_thr==0 and trx->state!=TRX_STATE_ACTIVE in lock_cancel_waiting_and_release(), if it was invoked from Deadlock::report(), and the fix is just in the assertion conditions changing. The fix is just in changing assertion condition. There is also lock_wait() cleanup around trx->error_state. If trx->error_state can be changed not by the owned thread, it must be protected with lock_sys.wait_mutex, as lock_wait() uses trx->lock.cond along with that mutex. Also if trx->error_state was changed before lock_sys.wait_mutex acquision, then it could be reset with the following code, what is wrong. Also we need to check trx->error_state before entering waiting loop, otherwise it can be the case when trx->error_state was set before lock_sys.wait_mutex acquision, but the thread will be waiting on trx->lock.cond.
- Loading branch information
1 parent
acebe35
commit 9c04d66
Showing
4 changed files
with
124 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
connect suspend_purge,localhost,root,,; | ||
START TRANSACTION WITH CONSISTENT SNAPSHOT; | ||
connection default; | ||
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB; | ||
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB; | ||
INSERT INTO t VALUES (10, 10), (20, 20), (30, 30); | ||
INSERT INTO t2 VALUES (10), (20), (30); | ||
BEGIN; | ||
UPDATE t2 SET a = a + 100; | ||
SELECT * FROM t WHERE a = 20 FOR UPDATE; | ||
a b | ||
20 20 | ||
connect con_2,localhost,root,,; | ||
SET TRANSACTION ISOLATION LEVEL READ COMMITTED; | ||
SET DEBUG_SYNC = 'lock_trx_handle_wait_enter SIGNAL upd_locked WAIT_FOR upd_cont'; | ||
SET DEBUG_SYNC = 'trx_t_release_locks_enter SIGNAL sel_cont WAIT_FOR upd_cont_2'; | ||
BEGIN; | ||
UPDATE t SET b = 100; | ||
connection default; | ||
SET DEBUG_SYNC="now WAIT_FOR upd_locked"; | ||
SET DEBUG_SYNC="deadlock_report_before_lock_releasing SIGNAL upd_cont WAIT_FOR sel_cont"; | ||
SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL sel_before_suspend"; | ||
SELECT * FROM t WHERE a = 10 FOR UPDATE;; | ||
connect con_3,localhost,root,,; | ||
SET DEBUG_SYNC="now WAIT_FOR sel_before_suspend"; | ||
SET DEBUG_SYNC="now SIGNAL upd_cont_2"; | ||
disconnect con_3; | ||
connection con_2; | ||
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction | ||
disconnect con_2; | ||
connection default; | ||
a b | ||
10 10 | ||
SET DEBUG_SYNC = 'RESET'; | ||
DROP TABLE t; | ||
DROP TABLE t2; | ||
disconnect suspend_purge; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
--source include/have_innodb.inc | ||
--source include/have_debug_sync.inc | ||
--source include/count_sessions.inc | ||
|
||
--connect(suspend_purge,localhost,root,,) | ||
# Purge can cause deadlock in the test, requesting page's RW_X_LATCH for trx | ||
# ids reseting, after trx 2 acqured RW_S_LATCH and suspended in debug sync point | ||
# lock_trx_handle_wait_enter, waiting for upd_cont signal, which must be | ||
# emitted after the last SELECT in this test. The last SELECT will hang waiting | ||
# for purge RW_X_LATCH releasing, and trx 2 will be rolled back by timeout. | ||
START TRANSACTION WITH CONSISTENT SNAPSHOT; | ||
|
||
--connection default | ||
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB; | ||
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB; | ||
|
||
INSERT INTO t VALUES (10, 10), (20, 20), (30, 30); | ||
INSERT INTO t2 VALUES (10), (20), (30); | ||
|
||
BEGIN; # trx 1 | ||
# The following update is necessary to increase the transaction weight, which is | ||
# calculated as the number of locks + the number of undo records during deadlock | ||
# report. Victim's transaction should have minimum weight. We need trx 2 to be | ||
# choosen as victim, that's why we need to increase the current transaction | ||
# weight. | ||
UPDATE t2 SET a = a + 100; | ||
SELECT * FROM t WHERE a = 20 FOR UPDATE; | ||
|
||
--connect(con_2,localhost,root,,) | ||
# RC is necessary to do semi-consistent read | ||
SET TRANSACTION ISOLATION LEVEL READ COMMITTED; | ||
# It will be hit on trying to lock (20,20). | ||
SET DEBUG_SYNC = 'lock_trx_handle_wait_enter SIGNAL upd_locked WAIT_FOR upd_cont'; | ||
SET DEBUG_SYNC = 'trx_t_release_locks_enter SIGNAL sel_cont WAIT_FOR upd_cont_2'; | ||
BEGIN; # trx 2 | ||
# We must not modify primary key fields to cause rr_sequential() read record | ||
# function choosing in mysql_update(), i.e. both query_plan.using_filesort and | ||
# query_plan.using_io_buffer must be false during init_read_record() call. | ||
# The following UPDATE will be chosen as deadlock victim and rolled back. | ||
--send UPDATE t SET b = 100 | ||
|
||
--connection default | ||
SET DEBUG_SYNC="now WAIT_FOR upd_locked"; | ||
SET DEBUG_SYNC="deadlock_report_before_lock_releasing SIGNAL upd_cont WAIT_FOR sel_cont"; | ||
SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL sel_before_suspend"; | ||
# If the bug is not fixed, the following SELECT will crash, as the above UPDATE | ||
# will reset trx->lock.wait_thr during rollback | ||
--send SELECT * FROM t WHERE a = 10 FOR UPDATE; | ||
|
||
--connect(con_3,localhost,root,,) | ||
SET DEBUG_SYNC="now WAIT_FOR sel_before_suspend"; | ||
SET DEBUG_SYNC="now SIGNAL upd_cont_2"; | ||
--disconnect con_3 | ||
|
||
--connection con_2 | ||
--error ER_LOCK_DEADLOCK | ||
--reap | ||
--disconnect con_2 | ||
|
||
--connection default | ||
--reap | ||
SET DEBUG_SYNC = 'RESET'; | ||
DROP TABLE t; | ||
DROP TABLE t2; | ||
--disconnect suspend_purge | ||
--source include/wait_until_count_sessions.inc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters