Skip to content

Commit

Permalink
MDEV-29869 mtr failure: innodb.deadlock_wait_thr_race
Browse files Browse the repository at this point in the history
1. The merge aeccbbd has overwritten
lock0lock.cc, and the changes of MDEV-29622 and MDEV-29635 were
partially lost, this commit restores the changes.

2. innodb.deadlock_wait_thr_race test:

The following hang was found during testing.

There is deadlock_report_before_lock_releasing sync point in
Deadlock::report(), which is waiting for sel_cont signal under lock_sys_t
lock. The signal must be issued after "UPDATE t SET b = 100" rollback,
and that rollback is executing undo record, which is blocked
on dict_sys latch request. dict_sys is locked by the thread of statistics
update(dict_stats_save()), and during that update lock_sys lock is
requested, and can't be acquired as Deadlock::report() holds it. We have
to disable statistics update to make the test stable.

But even if statistics update is disabled, and transaction with consistent
snapshot is started at the very beginning of the test to prevent purging,
the purge can still be invoked for system tables, and it tries to open
system table by id, what causes dict_sys.freeze() call and dict_sys
latching. What, in combination with lock_sys::xx_lock() causes the same
deadlock as described above. We need to disable purging globally for the
test as well.

All the above is applicable to innodb.deadlock_wait_lock_race test also.
  • Loading branch information
vlad-lesin committed Oct 26, 2022
1 parent 5027cb2 commit 78a04a4
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 42 deletions.
2 changes: 0 additions & 2 deletions mysql-test/suite/innodb/disabled.def

This file was deleted.

8 changes: 2 additions & 6 deletions mysql-test/suite/innodb/r/deadlock_wait_lock_race.result
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
connect suspend_purge,localhost,root,,;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB;
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB STATS_PERSISTENT=0;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB STATS_PERSISTENT=0;
INSERT INTO t VALUES (10, 10), (20, 20), (30, 30);
INSERT INTO t2 VALUES (10), (20), (30);
BEGIN;
Expand All @@ -28,4 +25,3 @@ a b
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
DROP TABLE t2;
disconnect suspend_purge;
8 changes: 2 additions & 6 deletions mysql-test/suite/innodb/r/deadlock_wait_thr_race.result
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
connect suspend_purge,localhost,root,,;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB;
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB STATS_PERSISTENT=0;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB STATS_PERSISTENT=0;
INSERT INTO t VALUES (10, 10), (20, 20), (30, 30);
INSERT INTO t2 VALUES (10), (20), (30);
BEGIN;
Expand Down Expand Up @@ -34,4 +31,3 @@ a b
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
DROP TABLE t2;
disconnect suspend_purge;
1 change: 1 addition & 0 deletions mysql-test/suite/innodb/t/deadlock_wait_lock_race.opt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--innodb-force-recovery=2
17 changes: 11 additions & 6 deletions mysql-test/suite/innodb/t/deadlock_wait_lock_race.test
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,23 @@
--source include/have_debug_sync.inc
--source include/count_sessions.inc

--connect(suspend_purge,localhost,root,,)
# Purge can cause deadlock in the test, requesting page's RW_X_LATCH for trx
# ids reseting, after trx 2 acqured RW_S_LATCH and suspended in debug sync point
# lock_trx_handle_wait_enter, waiting for upd_cont signal, which must be
# emitted after the last SELECT in this test. The last SELECT will hang waiting
# for purge RW_X_LATCH releasing, and trx 2 will be rolled back by timeout.
START TRANSACTION WITH CONSISTENT SNAPSHOT;

--connection default
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB;
# There is deadlock_report_before_lock_releasing sync point in
# Deadlock::report(), which is waiting for sel_cont signal under
# lock_sys_t lock. The signal must be issued after "UPDATE t SET b = 100"
# rollback, and that rollback is executing undo record, which is blocked on
# dict_sys latch request. dict_sys is locked by the thread of statistics
# update(dict_stats_save()), and during that update lock_sys lock is requested,
# and can't be acquired as Deadlock::report() holds it. We have to disable
# statistics update to make the test stable.

CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB STATS_PERSISTENT=0;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB STATS_PERSISTENT=0;

INSERT INTO t VALUES (10, 10), (20, 20), (30, 30);
INSERT INTO t2 VALUES (10), (20), (30);
Expand Down Expand Up @@ -58,5 +64,4 @@ SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL upd_cont";
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
DROP TABLE t2;
--disconnect suspend_purge
--source include/wait_until_count_sessions.inc
1 change: 1 addition & 0 deletions mysql-test/suite/innodb/t/deadlock_wait_thr_race.opt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--innodb-force-recovery=2
17 changes: 11 additions & 6 deletions mysql-test/suite/innodb/t/deadlock_wait_thr_race.test
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,23 @@
--source include/have_debug_sync.inc
--source include/count_sessions.inc

--connect(suspend_purge,localhost,root,,)
# Purge can cause deadlock in the test, requesting page's RW_X_LATCH for trx
# ids reseting, after trx 2 acqured RW_S_LATCH and suspended in debug sync point
# lock_trx_handle_wait_enter, waiting for upd_cont signal, which must be
# emitted after the last SELECT in this test. The last SELECT will hang waiting
# for purge RW_X_LATCH releasing, and trx 2 will be rolled back by timeout.
START TRANSACTION WITH CONSISTENT SNAPSHOT;

--connection default
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB;
# There is deadlock_report_before_lock_releasing sync point in
# Deadlock::report(), which is waiting for sel_cont signal under
# lock_sys_t lock. The signal must be issued after "UPDATE t SET b = 100"
# rollback, and that rollback is executing undo record, which is blocked on
# dict_sys latch request. dict_sys is locked by the thread of statistics
# update(dict_stats_save()), and during that update lock_sys lock is requested,
# and can't be acquired as Deadlock::report() holds it. We have to disable
# statistics update to make the test stable.

CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB STATS_PERSISTENT=0;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB STATS_PERSISTENT=0;

INSERT INTO t VALUES (10, 10), (20, 20), (30, 30);
INSERT INTO t2 VALUES (10), (20), (30);
Expand Down Expand Up @@ -62,5 +68,4 @@ SET DEBUG_SYNC="now SIGNAL upd_cont_2";
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
DROP TABLE t2;
--disconnect suspend_purge
--source include/wait_until_count_sessions.inc
46 changes: 30 additions & 16 deletions storage/innobase/lock/lock0lock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1796,8 +1796,8 @@ dberr_t lock_wait(que_thr_t *thr)
wait_lock->un_member.tab_lock.table->id <= DICT_FIELDS_ID);
thd_wait_begin(trx->mysql_thd, (type_mode & LOCK_TABLE)
? THD_WAIT_TABLE_LOCK : THD_WAIT_ROW_LOCK);
trx->error_state= DB_SUCCESS;

int err= 0;
mysql_mutex_lock(&lock_sys.wait_mutex);
if (trx->lock.wait_lock)
{
Expand All @@ -1819,25 +1819,24 @@ dberr_t lock_wait(que_thr_t *thr)
if (row_lock_wait)
lock_sys.wait_start();

trx->error_state= DB_SUCCESS;

#ifdef HAVE_REPLICATION
if (rpl)
lock_wait_rpl_report(trx);
#endif

if (trx->error_state != DB_SUCCESS)
goto check_trx_error;

while (trx->lock.wait_lock)
{
int err;
DEBUG_SYNC_C("lock_wait_before_suspend");

if (no_timeout)
{
my_cond_wait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex);
err= 0;
}
else
err= my_cond_timedwait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex,
&abstime);
check_trx_error:
switch (trx->error_state) {
case DB_DEADLOCK:
case DB_INTERRUPTED:
Expand Down Expand Up @@ -1883,17 +1882,19 @@ dberr_t lock_wait(que_thr_t *thr)


/** Resume a lock wait */
static void lock_wait_end(trx_t *trx)
template <bool from_deadlock= false>
void lock_wait_end(trx_t *trx)
{
mysql_mutex_assert_owner(&lock_sys.wait_mutex);
ut_ad(trx->mutex_is_owner());
ut_d(const auto state= trx->state);
ut_ad(state == TRX_STATE_ACTIVE || state == TRX_STATE_PREPARED);
ut_ad(trx->lock.wait_thr);
ut_ad(state == TRX_STATE_COMMITTED_IN_MEMORY || state == TRX_STATE_ACTIVE ||
state == TRX_STATE_PREPARED);
ut_ad(from_deadlock || trx->lock.wait_thr);

if (trx->lock.was_chosen_as_deadlock_victim)
{
ut_ad(state == TRX_STATE_ACTIVE);
ut_ad(from_deadlock || state == TRX_STATE_ACTIVE);
trx->error_state= DB_DEADLOCK;
}

Expand Down Expand Up @@ -5705,13 +5706,16 @@ static void lock_release_autoinc_locks(trx_t *trx)
}

/** Cancel a waiting lock request and release possibly waiting transactions */
static void lock_cancel_waiting_and_release(lock_t *lock)
template <bool from_deadlock= false>
void lock_cancel_waiting_and_release(lock_t *lock)
{
lock_sys.assert_locked(*lock);
mysql_mutex_assert_owner(&lock_sys.wait_mutex);
trx_t *trx= lock->trx;
trx->mutex_lock();
ut_ad(trx->state == TRX_STATE_ACTIVE);
ut_d(const auto trx_state= trx->state);
ut_ad(trx_state == TRX_STATE_COMMITTED_IN_MEMORY ||
trx_state == TRX_STATE_ACTIVE);

if (!lock->is_table())
lock_rec_dequeue_from_page(lock, true);
Expand All @@ -5730,7 +5734,8 @@ static void lock_cancel_waiting_and_release(lock_t *lock)
/* Reset the wait flag and the back pointer to lock in trx. */
lock_reset_lock_and_trx_wait(lock);

lock_wait_end(trx);
lock_wait_end<from_deadlock>(trx);

trx->mutex_unlock();
}

Expand Down Expand Up @@ -5901,6 +5906,7 @@ lock_unlock_table_autoinc(

/** Handle a pending lock wait (DB_LOCK_WAIT) in a semi-consistent read
while holding a clustered index leaf page latch.
@param trx transaction that is or was waiting for a lock
@retval DB_SUCCESS if the lock was granted
@retval DB_DEADLOCK if the transaction must be aborted due to a deadlock
Expand All @@ -5911,8 +5917,13 @@ dberr_t lock_trx_handle_wait(trx_t *trx)
DEBUG_SYNC_C("lock_trx_handle_wait_enter");
if (trx->lock.was_chosen_as_deadlock_victim)
return DB_DEADLOCK;
DEBUG_SYNC_C("lock_trx_handle_wait_before_unlocked_wait_lock_check");
/* trx->lock.was_chosen_as_deadlock_victim must always be set before
trx->lock.wait_lock if the transaction was chosen as deadlock victim,
the function must not return DB_SUCCESS if
trx->lock.was_chosen_as_deadlock_victim is set. */
if (!trx->lock.wait_lock)
return DB_SUCCESS;
return trx->lock.was_chosen_as_deadlock_victim ? DB_DEADLOCK : DB_SUCCESS;
dberr_t err= DB_SUCCESS;
mysql_mutex_lock(&lock_sys.wait_mutex);
if (trx->lock.was_chosen_as_deadlock_victim)
Expand Down Expand Up @@ -6315,8 +6326,11 @@ namespace Deadlock

ut_ad(victim->state == TRX_STATE_ACTIVE);

/* victim->lock.was_chosen_as_deadlock_victim must always be set before
releasing waiting locks and reseting trx->lock.wait_lock */
victim->lock.was_chosen_as_deadlock_victim= true;
lock_cancel_waiting_and_release(victim->lock.wait_lock);
DEBUG_SYNC_C("deadlock_report_before_lock_releasing");
lock_cancel_waiting_and_release<true>(victim->lock.wait_lock);
#ifdef WITH_WSREP
if (victim->is_wsrep() && wsrep_thd_is_SR(victim->mysql_thd))
wsrep_handle_SR_rollback(trx->mysql_thd, victim->mysql_thd);
Expand Down

0 comments on commit 78a04a4

Please sign in to comment.