Skip to content

Commit 7bffe46

Browse files
sjaakolaJan Lindström
authored andcommitted
MDEV-21910 Deadlock between BF abort and manual KILL command
When high priority replication slave applier encounters lock conflict in innodb, it will force the conflicting lock holder transaction (victim) to rollback. This is a must in multi-master sychronous replication model to avoid cluster lock-up. This high priority victim abort (aka "brute force" (BF) abort), is started from innodb lock manager while holding the victim's transaction's (trx) mutex. Depending on the execution state of the victim transaction, it may happen that the BF abort will call for THD::awake() to wake up the victim transaction for the rollback. Now, if BF abort requires THD::awake() to be called, then the applier thread executed locking protocol of: victim trx mutex -> victim THD::LOCK_thd_data If, at the same time another DBMS super user issues KILL command to abort the same victim, it will execute locking protocol of: victim THD::LOCK_thd_data -> victim trx mutex. These two locking protocol acquire mutexes in opposite order, hence unresolvable mutex locking deadlock may occur. The fix in this commit adds THD::wsrep_aborter flag to synchronize who can kill the victim This flag is set both when BF is called for from innodb and by KILL command. Either path of victim killing will bail out if victim's wsrep_killed is already set to avoid mutex conflicts with the other aborter execution. THD::wsrep_aborter records the aborter THD's ID. This is needed to preserve the right to kill the victim from different locations for the same aborter thread. It is also good error logging, to see who is reponsible for the abort. A new test case was added in galera.galera_bf_kill_debug.test for scenario where wsrep applier thread and manual KILL command try to kill same idle victim
1 parent 4ec032b commit 7bffe46

16 files changed

+441
-216
lines changed

include/mysql/service_wsrep.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ extern struct wsrep_service_st {
8787
ulong (*wsrep_OSU_method_get_func)(const MYSQL_THD thd);
8888
my_bool (*wsrep_thd_has_ignored_error_func)(const MYSQL_THD thd);
8989
void (*wsrep_thd_set_ignored_error_func)(MYSQL_THD thd, my_bool val);
90+
bool (*wsrep_thd_set_wsrep_aborter_func)(MYSQL_THD bf_thd, MYSQL_THD thd);
9091
} *wsrep_service;
9192

9293
#define MYSQL_SERVICE_WSREP_INCLUDED
@@ -130,6 +131,7 @@ extern struct wsrep_service_st {
130131
#define wsrep_OSU_method_get(T) wsrep_service->wsrep_OSU_method_get_func(T)
131132
#define wsrep_thd_has_ignored_error(T) wsrep_service->wsrep_thd_has_ignored_error_func(T)
132133
#define wsrep_thd_set_ignored_error(T,V) wsrep_service->wsrep_thd_set_ignored_error_func(T,V)
134+
#define wsrep_thd_set_wsrep_aborter(T) wsrep_service->wsrep_thd_set_wsrep_aborter_func(T1, T2)
133135
#else
134136

135137
#define MYSQL_SERVICE_WSREP_STATIC_INCLUDED
@@ -181,6 +183,8 @@ extern "C" my_bool wsrep_thd_is_local(const MYSQL_THD thd);
181183
/* Return true if thd is in high priority mode */
182184
/* todo: rename to is_high_priority() */
183185
extern "C" my_bool wsrep_thd_is_applying(const MYSQL_THD thd);
186+
/* set wsrep_aborter for the target THD */
187+
extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd);
184188
/* Return true if thd is in TOI mode */
185189
extern "C" my_bool wsrep_thd_is_toi(const MYSQL_THD thd);
186190
/* Return true if thd is in replicating TOI mode */
@@ -224,5 +228,6 @@ extern "C" my_bool wsrep_thd_is_applying(const MYSQL_THD thd);
224228
extern "C" ulong wsrep_OSU_method_get(const MYSQL_THD thd);
225229
extern "C" my_bool wsrep_thd_has_ignored_error(const MYSQL_THD thd);
226230
extern "C" void wsrep_thd_set_ignored_error(MYSQL_THD thd, my_bool val);
231+
extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd);
227232
#endif
228233
#endif /* MYSQL_SERVICE_WSREP_INCLUDED */

mysql-test/suite/galera/r/galera_bf_kill.result

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -69,21 +69,5 @@ select * from t1;
6969
a b
7070
2 1
7171
disconnect node_2a;
72-
drop table t1;
73-
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
74-
connection node_2a;
75-
CREATE TABLE t1 (i int primary key);
76-
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
77-
INSERT INTO t1 VALUES (1);
78-
connection node_2;
79-
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
80-
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
81-
SET DEBUG_SYNC='RESET';
82-
connection node_2a;
83-
connection node_2;
84-
select * from t1;
85-
i
86-
1
87-
disconnect node_2a;
88-
connection node_2;
72+
connection node_1;
8973
drop table t1;
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
connection node_2;
2+
connection node_1;
3+
connection node_2;
4+
CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
5+
insert into t1 values (NULL,1);
6+
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
7+
connection node_2a;
8+
truncate t1;
9+
insert into t1 values (1,0);
10+
begin;
11+
update t1 set b=2 where a=1;
12+
connection node_2;
13+
set session wsrep_sync_wait=0;
14+
connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2;
15+
connection node_2b;
16+
SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
17+
connection node_1;
18+
select * from t1;
19+
a b
20+
1 0
21+
update t1 set b= 1 where a=1;
22+
connection node_2b;
23+
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
24+
connection node_2;
25+
SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
26+
connection node_2b;
27+
SET DEBUG_SYNC='now WAIT_FOR awake_reached';
28+
SET GLOBAL debug_dbug = "";
29+
SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
30+
SET DEBUG_SYNC = "now SIGNAL continue_kill";
31+
connection node_2;
32+
connection node_2a;
33+
select * from t1;
34+
connection node_2;
35+
SET DEBUG_SYNC = "RESET";
36+
drop table t1;
37+
disconnect node_2a;
38+
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
39+
connection node_2a;
40+
CREATE TABLE t1 (i int primary key);
41+
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
42+
INSERT INTO t1 VALUES (1);
43+
connection node_2;
44+
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
45+
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
46+
SET DEBUG_SYNC='RESET';
47+
connection node_2a;
48+
connection node_2;
49+
select * from t1;
50+
i
51+
1
52+
disconnect node_2a;
53+
connection node_1;
54+
drop table t1;

mysql-test/suite/galera/r/galera_bf_lock_wait.result

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
connection node_2;
22
connection node_1;
3+
connection node_2;
4+
call mtr.add_suppression("WSREP: Trying to continue unpaused monitor");
35
connection node_1;
46
call mtr.add_suppression("WSREP: Trying to continue unpaused monitor");
57
CREATE TABLE t1 ENGINE=InnoDB select 1 as a, 1 as b union select 2, 2;

mysql-test/suite/galera/t/galera_bf_kill.test

Lines changed: 1 addition & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
--source include/galera_cluster.inc
22
--source include/have_innodb.inc
3-
--source include/have_debug.inc
4-
--source include/have_debug_sync.inc
53

64
#
75
# Test case 1: Start a transaction on node_2a and kill it
@@ -135,56 +133,9 @@ update t1 set a =5, b=2;
135133
--eval KILL $k_thread
136134
--enable_query_log
137135

138-
139136
select * from t1;
140137

141138
--disconnect node_2a
142139

140+
--connection node_1
143141
drop table t1;
144-
145-
146-
#
147-
# Test case 7:
148-
# run a transaction in node 2, and set a sync point to pause the transaction
149-
# in commit phase.
150-
# Through another connection to node 2, kill the committing transaction by
151-
# KILL QUERY command
152-
#
153-
154-
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
155-
--connection node_2a
156-
--let $connection_id = `SELECT CONNECTION_ID()`
157-
158-
CREATE TABLE t1 (i int primary key);
159-
160-
# Set up sync point
161-
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
162-
163-
# Send insert which will block in the sync point above
164-
--send INSERT INTO t1 VALUES (1)
165-
166-
--connection node_2
167-
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
168-
169-
--disable_query_log
170-
--disable_result_log
171-
# victim has passed the point of no return, kill is not possible anymore
172-
--eval KILL QUERY $connection_id
173-
--enable_result_log
174-
--enable_query_log
175-
176-
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
177-
SET DEBUG_SYNC='RESET';
178-
--connection node_2a
179-
--error 0,1213
180-
--reap
181-
182-
--connection node_2
183-
# victim was able to complete the INSERT
184-
select * from t1;
185-
186-
--disconnect node_2a
187-
188-
--connection node_2
189-
drop table t1;
190-
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
!include ../galera_2nodes.cnf
2+
3+
[mysqld.1]
4+
wsrep-debug=SERVER
5+
6+
[mysqld.2]
7+
wsrep-debug=SERVER
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
--source include/galera_cluster.inc
2+
--source include/have_innodb.inc
3+
--source include/have_debug.inc
4+
--source include/have_debug_sync.inc
5+
6+
#
7+
# Test case 7:
8+
# 1. Start a transaction on node_2,
9+
# and leave it pending while holding a row locked
10+
# 2. set sync point pause applier
11+
# 3. send a conflicting write on node_1, it will pause
12+
# at the sync point
13+
# 4. though another connection to node_2, kill the local
14+
# transaction
15+
#
16+
17+
--connection node_2
18+
CREATE TABLE t1(a int not null primary key auto_increment,b int) engine=InnoDB;
19+
insert into t1 values (NULL,1);
20+
21+
#
22+
# connection node_2a runs a local transaction, that is victim of BF abort
23+
# and victim of KILL command by connection node_2
24+
#
25+
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
26+
--connection node_2a
27+
truncate t1;
28+
insert into t1 values (1,0);
29+
30+
# start a transaction that will conflict with later applier
31+
begin;
32+
update t1 set b=2 where a=1;
33+
34+
--connection node_2
35+
set session wsrep_sync_wait=0;
36+
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1
37+
--source include/wait_condition.inc
38+
39+
--let $k_thread = `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE USER = 'root' AND COMMAND = 'Sleep' LIMIT 1`
40+
41+
# connection node_2b is for controlling debug syn points
42+
# first set a sync point for applier, to pause during BF aborting
43+
# and before THD::awake would be called
44+
#
45+
--connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2
46+
--connection node_2b
47+
SET GLOBAL debug_dbug = "d,sync.before_wsrep_thd_abort";
48+
49+
#
50+
# replicate an update, which will BF abort the victim node_2a
51+
# however, while applier in node 2 is handling the abort,
52+
# it will pause in sync point set by node_2b
53+
#
54+
--connection node_1
55+
select * from t1;
56+
update t1 set b= 1 where a=1;
57+
58+
#
59+
# wait until the applying of above update has reached the sync point
60+
# in node 2
61+
#
62+
--connection node_2b
63+
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.before_wsrep_thd_abort_reached";
64+
65+
--connection node_2
66+
#
67+
# pause KILL execution before awake
68+
#
69+
SET DEBUG_SYNC= 'before_awake_no_mutex SIGNAL awake_reached WAIT_FOR continue_kill';
70+
--disable_query_log
71+
--send_eval KILL $k_thread
72+
--enable_query_log
73+
74+
75+
--connection node_2b
76+
SET DEBUG_SYNC='now WAIT_FOR awake_reached';
77+
78+
# release applier and KILL operator
79+
SET GLOBAL debug_dbug = "";
80+
SET DEBUG_SYNC = "now SIGNAL signal.before_wsrep_thd_abort";
81+
SET DEBUG_SYNC = "now SIGNAL continue_kill";
82+
83+
--connection node_2
84+
--reap
85+
86+
--connection node_2a
87+
--error 0,1213
88+
select * from t1;
89+
90+
--connection node_2
91+
SET DEBUG_SYNC = "RESET";
92+
93+
drop table t1;
94+
95+
--disconnect node_2a
96+
#
97+
# Test case 7:
98+
# run a transaction in node 2, and set a sync point to pause the transaction
99+
# in commit phase.
100+
# Through another connection to node 2, kill the committing transaction by
101+
# KILL QUERY command
102+
#
103+
104+
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
105+
--connection node_2a
106+
--let $connection_id = `SELECT CONNECTION_ID()`
107+
108+
CREATE TABLE t1 (i int primary key);
109+
110+
# Set up sync point
111+
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
112+
113+
# Send insert which will block in the sync point above
114+
--send INSERT INTO t1 VALUES (1)
115+
116+
--connection node_2
117+
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
118+
119+
--disable_query_log
120+
--disable_result_log
121+
# victim has passed the point of no return, kill is not possible anymore
122+
--eval KILL QUERY $connection_id
123+
--enable_result_log
124+
--enable_query_log
125+
126+
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
127+
SET DEBUG_SYNC='RESET';
128+
--connection node_2a
129+
--error 0,1213
130+
--reap
131+
132+
--connection node_2
133+
# victim was able to complete the INSERT
134+
select * from t1;
135+
136+
--disconnect node_2a
137+
138+
--connection node_1
139+
drop table t1;
140+

mysql-test/suite/galera/t/galera_bf_lock_wait.test

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
--source include/have_innodb.inc
33
--source include/big_test.inc
44

5-
--connection node_1
5+
--connection node_2
6+
call mtr.add_suppression("WSREP: Trying to continue unpaused monitor");
67

8+
--connection node_1
79
call mtr.add_suppression("WSREP: Trying to continue unpaused monitor");
810

911
CREATE TABLE t1 ENGINE=InnoDB select 1 as a, 1 as b union select 2, 2;

0 commit comments

Comments
 (0)