Skip to content

Commit

Permalink
MDEV-21910 Deadlock between BF abort and manual KILL command
Browse files Browse the repository at this point in the history
When high priority replication slave applier encounters lock conflict in innodb,
it will force the conflicting lock holder transaction (victim) to rollback.
This is a must in multi-master sychronous replication model to avoid cluster lock-up.
This high priority victim abort (aka "brute force" (BF) abort), is started
from innodb lock manager while holding the victim's transaction's (trx) mutex.
Depending on the execution state of the victim transaction, it may happen that the
BF abort will call for THD::awake() to wake up the victim transaction for the rollback.
Now, if BF abort requires THD::awake() to be called, then the applier thread executed
locking protocol of: victim trx mutex -> victim THD::LOCK_thd_data

If, at the same time another DBMS super user issues KILL command to abort the same victim,
it will execute locking protocol of: victim THD::LOCK_thd_data  -> victim trx mutex.
These two locking protocol acquire mutexes in opposite order, hence unresolvable mutex locking
deadlock may occur.

The fix in this commit adds THD::wsrep_aborter flag to synchronize who can kill the victim
This flag is set both when BF is called for from innodb and by KILL command.
Either path of victim killing will bail out if victim's wsrep_killed is already
set to avoid mutex conflicts with the other aborter execution. THD::wsrep_aborter
records the aborter THD's ID. This is needed to preserve the right to kill
the victim from different locations for the same aborter thread.
It is also good error logging, to see who is reponsible for the abort.

A new test case was added in galera.galera_bf_kill_debug.test for scenario where
wsrep applier thread and manual KILL command try to kill same idle victim
  • Loading branch information
sjaakola authored and Jan Lindström committed Jun 26, 2020
1 parent 141b390 commit 5a7794d
Show file tree
Hide file tree
Showing 15 changed files with 290 additions and 73 deletions.
5 changes: 5 additions & 0 deletions include/mysql/service_wsrep.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ extern struct wsrep_service_st {
my_bool (*wsrep_get_debug_func)();
void (*wsrep_commit_ordered_func)(MYSQL_THD thd);
my_bool (*wsrep_thd_is_applying_func)(const MYSQL_THD thd);
bool (*wsrep_thd_set_wsrep_aborter_func)(MYSQL_THD bf_thd, MYSQL_THD thd);
} *wsrep_service;

#define MYSQL_SERVICE_WSREP_INCLUDED
Expand Down Expand Up @@ -124,6 +125,7 @@ extern struct wsrep_service_st {
#define wsrep_get_debug() wsrep_service->wsrep_get_debug_func()
#define wsrep_commit_ordered(T) wsrep_service->wsrep_commit_ordered_func(T)
#define wsrep_thd_is_applying(T) wsrep_service->wsrep_thd_is_applying_func(T)
#define wsrep_thd_set_wsrep_aborter(T) wsrep_service->wsrep_thd_set_wsrep_aborter_func(T1, T2)

#else

Expand Down Expand Up @@ -176,6 +178,8 @@ extern "C" my_bool wsrep_thd_is_local(const MYSQL_THD thd);
/* Return true if thd is in high priority mode */
/* todo: rename to is_high_priority() */
extern "C" my_bool wsrep_thd_is_applying(const MYSQL_THD thd);
/* set wsrep_aborter for the target THD */
extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd);
/* Return true if thd is in TOI mode */
extern "C" my_bool wsrep_thd_is_toi(const MYSQL_THD thd);
/* Return true if thd is in replicating TOI mode */
Expand Down Expand Up @@ -216,6 +220,7 @@ extern "C" my_bool wsrep_get_debug();

extern "C" void wsrep_commit_ordered(MYSQL_THD thd);
extern "C" my_bool wsrep_thd_is_applying(const MYSQL_THD thd);
extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd);

#endif
#endif /* MYSQL_SERVICE_WSREP_INCLUDED */
18 changes: 1 addition & 17 deletions mysql-test/suite/galera/r/galera_bf_kill.result
Original file line number Diff line number Diff line change
Expand Up @@ -69,21 +69,5 @@ select * from t1;
a b
2 1
disconnect node_2a;
drop table t1;
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2a;
CREATE TABLE t1 (i int primary key);
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
INSERT INTO t1 VALUES (1);
connection node_2;
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
SET DEBUG_SYNC='RESET';
connection node_2a;
connection node_2;
select * from t1;
i
1
disconnect node_2a;
connection node_2;
connection node_1;
drop table t1;
54 changes: 54 additions & 0 deletions mysql-test/suite/galera/r/galera_bf_kill_debug.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
connection node_2;
connection node_1;
connection node_2;
CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
insert into t1 values (NULL,1);
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2a;
truncate t1;
insert into t1 values (1,0);
begin;
update t1 set b=2 where a=1;
connection node_2;
set session wsrep_sync_wait=0;
connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2b;
SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
connection node_1;
select * from t1;
a b
1 0
update t1 set b= 1 where a=1;
connection node_2b;
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
connection node_2;
SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
connection node_2b;
SET DEBUG_SYNC='now WAIT_FOR awake_reached';
SET GLOBAL debug_dbug = "";
SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
SET DEBUG_SYNC = "now SIGNAL continue_kill";
connection node_2;
connection node_2a;
select * from t1;
connection node_2;
SET DEBUG_SYNC = "RESET";
drop table t1;
disconnect node_2a;
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2a;
CREATE TABLE t1 (i int primary key);
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
INSERT INTO t1 VALUES (1);
connection node_2;
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
SET DEBUG_SYNC='RESET';
connection node_2a;
connection node_2;
select * from t1;
i
1
disconnect node_2a;
connection node_1;
drop table t1;
2 changes: 2 additions & 0 deletions mysql-test/suite/galera/r/galera_bf_lock_wait.result
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
connection node_2;
connection node_1;
connection node_2;
call mtr.add_suppression("WSREP: Trying to continue unpaused monitor");
connection node_1;
call mtr.add_suppression("WSREP: Trying to continue unpaused monitor");
CREATE TABLE t1 ENGINE=InnoDB select 1 as a, 1 as b union select 2, 2;
Expand Down
51 changes: 1 addition & 50 deletions mysql-test/suite/galera/t/galera_bf_kill.test
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc

#
# Test case 1: Start a transaction on node_2a and kill it
Expand Down Expand Up @@ -135,56 +133,9 @@ update t1 set a =5, b=2;
--eval KILL $k_thread
--enable_query_log


select * from t1;

--disconnect node_2a

--connection node_1
drop table t1;


#
# Test case 7:
# run a transaction in node 2, and set a sync point to pause the transaction
# in commit phase.
# Through another connection to node 2, kill the committing transaction by
# KILL QUERY command
#

--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2a
--let $connection_id = `SELECT CONNECTION_ID()`

CREATE TABLE t1 (i int primary key);

# Set up sync point
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";

# Send insert which will block in the sync point above
--send INSERT INTO t1 VALUES (1)

--connection node_2
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";

--disable_query_log
--disable_result_log
# victim has passed the point of no return, kill is not possible anymore
--eval KILL QUERY $connection_id
--enable_result_log
--enable_query_log

SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
SET DEBUG_SYNC='RESET';
--connection node_2a
--error 0,1213
--reap

--connection node_2
# victim was able to complete the INSERT
select * from t1;

--disconnect node_2a

--connection node_2
drop table t1;

7 changes: 7 additions & 0 deletions mysql-test/suite/galera/t/galera_bf_kill_debug.cnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
!include ../galera_2nodes.cnf

[mysqld.1]
wsrep-debug=SERVER

[mysqld.2]
wsrep-debug=SERVER
140 changes: 140 additions & 0 deletions mysql-test/suite/galera/t/galera_bf_kill_debug.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc

#
# Test case 7:
# 1. Start a transaction on node_2,
# and leave it pending while holding a row locked
# 2. set sync point pause applier
# 3. send a conflicting write on node_1, it will pause
# at the sync point
# 4. though another connection to node_2, kill the local
# transaction
#

--connection node_2
CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
insert into t1 values (NULL,1);

#
# connection node_2a runs a local transaction, that is victim of BF abort
# and victim of KILL command by connection node_2
#
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2a
truncate t1;
insert into t1 values (1,0);

# start a transaction that will conflict with later applier
begin;
update t1 set b=2 where a=1;

--connection node_2
set session wsrep_sync_wait=0;
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1
--source include/wait_condition.inc

--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1`

# connection node_2b is for controlling debug syn points
# first set a sync point for applier, to pause during BF aborting
# and before THD::awake would be called
#
--connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2b
SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";

#
# replicate an update, which will BF abort the victim node_2a
# however, while applier in node 2 is handling the abort,
# it will pause in sync point set by node_2b
#
--connection node_1
select * from t1;
update t1 set b= 1 where a=1;

#
# wait until the applying of above update has reached the sync point
# in node 2
#
--connection node_2b
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";

--connection node_2
#
# pause KILL execution before awake
#
SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
--disable_query_log
--send_eval KILL $k_thread
--enable_query_log


--connection node_2b
SET DEBUG_SYNC='now WAIT_FOR awake_reached';

# release applier and KILL operator
SET GLOBAL debug_dbug = "";
SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
SET DEBUG_SYNC = "now SIGNAL continue_kill";

--connection node_2
--reap

--connection node_2a
--error 0,1213
select * from t1;

--connection node_2
SET DEBUG_SYNC = "RESET";

drop table t1;

--disconnect node_2a
#
# Test case 7:
# run a transaction in node 2, and set a sync point to pause the transaction
# in commit phase.
# Through another connection to node 2, kill the committing transaction by
# KILL QUERY command
#

--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2a
--let $connection_id = `SELECT CONNECTION_ID()`

CREATE TABLE t1 (i int primary key);

# Set up sync point
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";

# Send insert which will block in the sync point above
--send INSERT INTO t1 VALUES (1)

--connection node_2
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";

--disable_query_log
--disable_result_log
# victim has passed the point of no return, kill is not possible anymore
--eval KILL QUERY $connection_id
--enable_result_log
--enable_query_log

SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
SET DEBUG_SYNC='RESET';
--connection node_2a
--error 0,1213
--reap

--connection node_2
# victim was able to complete the INSERT
select * from t1;

--disconnect node_2a

--connection node_1
drop table t1;

4 changes: 3 additions & 1 deletion mysql-test/suite/galera/t/galera_bf_lock_wait.test
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
--source include/have_innodb.inc
--source include/big_test.inc

--connection node_1
--connection node_2
call mtr.add_suppression("WSREP: Trying to continue unpaused monitor");

--connection node_1
call mtr.add_suppression("WSREP: Trying to continue unpaused monitor");

CREATE TABLE t1 ENGINE=InnoDB select 1 as a, 1 as b union select 2, 2;
Expand Down
Loading

0 comments on commit 5a7794d

Please sign in to comment.