From 9c04d66d11ba72addaa5adff64a0458d91c19c2c Mon Sep 17 00:00:00 2001 From: Vlad Lesin Date: Fri, 7 Oct 2022 23:25:58 +0300 Subject: [PATCH] MDEV-29622 Wrong assertions in lock_cancel_waiting_and_release() for deadlock resolving caller Suppose we have two transactions, trx 1 and trx 2. trx 2 does deadlock resolving from lock_wait(), it sets victim->lock.was_chosen_as_deadlock_victim=true for trx 1, but has not yet invoked lock_cancel_waiting_and_release(). trx 1 checks the flag in lock_trx_handle_wait(), and starts rollback from row_mysql_handle_errors(). It can change trx->lock.wait_thr and trx->state as it holds trx_t::mutex, but trx 2 has not yet requested it, as lock_cancel_waiting_and_release() has not yet been called. After that trx 1 tries to release locks in trx_t::rollback_low(), invoking trx_t::rollback_finish(). lock_release() is blocked on try to acquire lock_sys.rd_lock(SRW_LOCK_CALL) in lock_release_try(), as lock_sys is blocked by trx 2, as deadlock resolution works under lock_sys.wr_lock(SRW_LOCK_CALL), see Deadlock::report() for details. trx 2 executes lock_cancel_waiting_and_release() for deadlock victim, i. e. for trx 1. lock_cancel_waiting_and_release() contains some trx->lock.wait_thr and trx->state assertions, which will fail, because trx 1 has changed them during rollback execution. So, according to the above scenario, it's legal to have trx->lock.wait_thr==0 and trx->state!=TRX_STATE_ACTIVE in lock_cancel_waiting_and_release(), if it was invoked from Deadlock::report(), and the fix is just in the assertion conditions changing. The fix is just in changing assertion condition. There is also lock_wait() cleanup around trx->error_state. If trx->error_state can be changed not by the owned thread, it must be protected with lock_sys.wait_mutex, as lock_wait() uses trx->lock.cond along with that mutex. Also if trx->error_state was changed before lock_sys.wait_mutex acquision, then it could be reset with the following code, what is wrong. Also we need to check trx->error_state before entering waiting loop, otherwise it can be the case when trx->error_state was set before lock_sys.wait_mutex acquision, but the thread will be waiting on trx->lock.cond. --- .../innodb/r/deadlock_wait_thr_race.result | 37 +++++++++++ .../innodb/t/deadlock_wait_thr_race.test | 66 +++++++++++++++++++ storage/innobase/lock/lock0lock.cc | 35 +++++----- storage/innobase/trx/trx0trx.cc | 1 + 4 files changed, 124 insertions(+), 15 deletions(-) create mode 100644 mysql-test/suite/innodb/r/deadlock_wait_thr_race.result create mode 100644 mysql-test/suite/innodb/t/deadlock_wait_thr_race.test diff --git a/mysql-test/suite/innodb/r/deadlock_wait_thr_race.result b/mysql-test/suite/innodb/r/deadlock_wait_thr_race.result new file mode 100644 index 0000000000000..cea74b0b1cb07 --- /dev/null +++ b/mysql-test/suite/innodb/r/deadlock_wait_thr_race.result @@ -0,0 +1,37 @@ +connect suspend_purge,localhost,root,,; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +connection default; +CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB; +CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB; +INSERT INTO t VALUES (10, 10), (20, 20), (30, 30); +INSERT INTO t2 VALUES (10), (20), (30); +BEGIN; +UPDATE t2 SET a = a + 100; +SELECT * FROM t WHERE a = 20 FOR UPDATE; +a b +20 20 +connect con_2,localhost,root,,; +SET TRANSACTION ISOLATION LEVEL READ COMMITTED; +SET DEBUG_SYNC = 'lock_trx_handle_wait_enter SIGNAL upd_locked WAIT_FOR upd_cont'; +SET DEBUG_SYNC = 'trx_t_release_locks_enter SIGNAL sel_cont WAIT_FOR upd_cont_2'; +BEGIN; +UPDATE t SET b = 100; +connection default; +SET DEBUG_SYNC="now WAIT_FOR upd_locked"; +SET DEBUG_SYNC="deadlock_report_before_lock_releasing SIGNAL upd_cont WAIT_FOR sel_cont"; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL sel_before_suspend"; +SELECT * FROM t WHERE a = 10 FOR UPDATE;; +connect con_3,localhost,root,,; +SET DEBUG_SYNC="now WAIT_FOR sel_before_suspend"; +SET DEBUG_SYNC="now SIGNAL upd_cont_2"; +disconnect con_3; +connection con_2; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +disconnect con_2; +connection default; +a b +10 10 +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t; +DROP TABLE t2; +disconnect suspend_purge; diff --git a/mysql-test/suite/innodb/t/deadlock_wait_thr_race.test b/mysql-test/suite/innodb/t/deadlock_wait_thr_race.test new file mode 100644 index 0000000000000..2027b45cbae78 --- /dev/null +++ b/mysql-test/suite/innodb/t/deadlock_wait_thr_race.test @@ -0,0 +1,66 @@ +--source include/have_innodb.inc +--source include/have_debug_sync.inc +--source include/count_sessions.inc + +--connect(suspend_purge,localhost,root,,) +# Purge can cause deadlock in the test, requesting page's RW_X_LATCH for trx +# ids reseting, after trx 2 acqured RW_S_LATCH and suspended in debug sync point +# lock_trx_handle_wait_enter, waiting for upd_cont signal, which must be +# emitted after the last SELECT in this test. The last SELECT will hang waiting +# for purge RW_X_LATCH releasing, and trx 2 will be rolled back by timeout. +START TRANSACTION WITH CONSISTENT SNAPSHOT; + +--connection default +CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB; +CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB; + +INSERT INTO t VALUES (10, 10), (20, 20), (30, 30); +INSERT INTO t2 VALUES (10), (20), (30); + +BEGIN; # trx 1 +# The following update is necessary to increase the transaction weight, which is +# calculated as the number of locks + the number of undo records during deadlock +# report. Victim's transaction should have minimum weight. We need trx 2 to be +# choosen as victim, that's why we need to increase the current transaction +# weight. +UPDATE t2 SET a = a + 100; +SELECT * FROM t WHERE a = 20 FOR UPDATE; + +--connect(con_2,localhost,root,,) +# RC is necessary to do semi-consistent read +SET TRANSACTION ISOLATION LEVEL READ COMMITTED; +# It will be hit on trying to lock (20,20). +SET DEBUG_SYNC = 'lock_trx_handle_wait_enter SIGNAL upd_locked WAIT_FOR upd_cont'; +SET DEBUG_SYNC = 'trx_t_release_locks_enter SIGNAL sel_cont WAIT_FOR upd_cont_2'; +BEGIN; # trx 2 +# We must not modify primary key fields to cause rr_sequential() read record +# function choosing in mysql_update(), i.e. both query_plan.using_filesort and +# query_plan.using_io_buffer must be false during init_read_record() call. +# The following UPDATE will be chosen as deadlock victim and rolled back. +--send UPDATE t SET b = 100 + +--connection default +SET DEBUG_SYNC="now WAIT_FOR upd_locked"; +SET DEBUG_SYNC="deadlock_report_before_lock_releasing SIGNAL upd_cont WAIT_FOR sel_cont"; +SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL sel_before_suspend"; +# If the bug is not fixed, the following SELECT will crash, as the above UPDATE +# will reset trx->lock.wait_thr during rollback +--send SELECT * FROM t WHERE a = 10 FOR UPDATE; + +--connect(con_3,localhost,root,,) +SET DEBUG_SYNC="now WAIT_FOR sel_before_suspend"; +SET DEBUG_SYNC="now SIGNAL upd_cont_2"; +--disconnect con_3 + +--connection con_2 +--error ER_LOCK_DEADLOCK +--reap +--disconnect con_2 + +--connection default +--reap +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t; +DROP TABLE t2; +--disconnect suspend_purge +--source include/wait_until_count_sessions.inc diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index f4d9ee6ceda3c..484d04fdc1495 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1789,8 +1789,8 @@ dberr_t lock_wait(que_thr_t *thr) wait_lock->un_member.tab_lock.table->id <= DICT_FIELDS_ID); thd_wait_begin(trx->mysql_thd, (type_mode & LOCK_TABLE) ? THD_WAIT_TABLE_LOCK : THD_WAIT_ROW_LOCK); - trx->error_state= DB_SUCCESS; + int err= 0; mysql_mutex_lock(&lock_sys.wait_mutex); if (trx->lock.wait_lock) { @@ -1812,26 +1812,24 @@ dberr_t lock_wait(que_thr_t *thr) if (row_lock_wait) lock_sys.wait_start(); - trx->error_state= DB_SUCCESS; - #ifdef HAVE_REPLICATION if (rpl) lock_wait_rpl_report(trx); #endif + if (trx->error_state != DB_SUCCESS) + goto check_trx_error; + while (trx->lock.wait_lock) { - int err; DEBUG_SYNC_C("lock_wait_before_suspend"); if (no_timeout) - { my_cond_wait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex); - err= 0; - } else err= my_cond_timedwait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex, &abstime); +check_trx_error: switch (trx->error_state) { case DB_DEADLOCK: case DB_INTERRUPTED: @@ -1877,17 +1875,19 @@ dberr_t lock_wait(que_thr_t *thr) /** Resume a lock wait */ -static void lock_wait_end(trx_t *trx) +template +void lock_wait_end(trx_t *trx) { mysql_mutex_assert_owner(&lock_sys.wait_mutex); ut_ad(trx->mutex_is_owner()); ut_d(const auto state= trx->state); - ut_ad(state == TRX_STATE_ACTIVE || state == TRX_STATE_PREPARED); - ut_ad(trx->lock.wait_thr); + ut_ad(state == TRX_STATE_COMMITTED_IN_MEMORY || state == TRX_STATE_ACTIVE || + state == TRX_STATE_PREPARED); + ut_ad(from_deadlock || trx->lock.wait_thr); if (trx->lock.was_chosen_as_deadlock_victim) { - ut_ad(state == TRX_STATE_ACTIVE); + ut_ad(from_deadlock || state == TRX_STATE_ACTIVE); trx->error_state= DB_DEADLOCK; } @@ -5674,13 +5674,16 @@ static void lock_release_autoinc_locks(trx_t *trx) } /** Cancel a waiting lock request and release possibly waiting transactions */ -static void lock_cancel_waiting_and_release(lock_t *lock) +template +void lock_cancel_waiting_and_release(lock_t *lock) { lock_sys.assert_locked(*lock); mysql_mutex_assert_owner(&lock_sys.wait_mutex); trx_t *trx= lock->trx; trx->mutex_lock(); - ut_ad(trx->state == TRX_STATE_ACTIVE); + ut_d(const auto trx_state= trx->state); + ut_ad(trx_state == TRX_STATE_COMMITTED_IN_MEMORY || + trx_state == TRX_STATE_ACTIVE); if (!lock->is_table()) lock_rec_dequeue_from_page(lock, true); @@ -5699,7 +5702,8 @@ static void lock_cancel_waiting_and_release(lock_t *lock) /* Reset the wait flag and the back pointer to lock in trx. */ lock_reset_lock_and_trx_wait(lock); - lock_wait_end(trx); + lock_wait_end(trx); + trx->mutex_unlock(); } @@ -6293,7 +6297,8 @@ namespace Deadlock /* victim->lock.was_chosen_as_deadlock_victim must always be set before releasing waiting locks and reseting trx->lock.wait_lock */ victim->lock.was_chosen_as_deadlock_victim= true; - lock_cancel_waiting_and_release(victim->lock.wait_lock); + DEBUG_SYNC_C("deadlock_report_before_lock_releasing"); + lock_cancel_waiting_and_release(victim->lock.wait_lock); #ifdef WITH_WSREP if (victim->is_wsrep() && wsrep_thd_is_SR(victim->mysql_thd)) wsrep_handle_SR_rollback(trx->mysql_thd, victim->mysql_thd); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index f9fe2c19fe154..a429ea9b7fb11 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -485,6 +485,7 @@ TRANSACTIONAL_INLINE inline void trx_t::commit_state() /** Release any explicit locks of a committing transaction. */ inline void trx_t::release_locks() { + DEBUG_SYNC_C("trx_t_release_locks_enter"); DBUG_ASSERT(state == TRX_STATE_COMMITTED_IN_MEMORY); DBUG_ASSERT(!is_referenced());