Skip to content

Commit

Permalink
MDEV-25114 Crash: WSREP: invalid state ROLLED_BACK (FATAL)
Browse files Browse the repository at this point in the history
This patch is the plan D variant for fixing potetial mutex locking
order exercised by BF aborting and KILL command execution.

In this approach, KILL command is replicated as TOI operation.
This guarantees total isolation for the KILL command execution
in the first node: there is no concurrent replication applying
and no concurrent DDL executing. Therefore there is no risk of
BF aborting to happen in parallel with KILL command execution
either. Potential mutex deadlocks between the different mutex
access paths with KILL command execution and BF aborting cannot
therefore happen.

TOI replication is used, in this approach,  purely as means
to provide isolated KILL command execution in the first node.
KILL command should not (and must not) be applied in secondary
nodes. In this patch, we make this sure by skipping KILL
execution in secondary nodes, in applying phase, where we
bail out if applier thread is trying to execute KILL command.
This is effective, but skipping the applying of KILL command
could happen much earlier as well.

This patch also fixes mutex locking order and unprotected
THD member accesses on bf aborting case. We try to hold
THD::LOCK_thd_data during bf aborting. Only case where it
is not possible is at wsrep_abort_transaction before
call wsrep_innobase_kill_one_trx where we take InnoDB
mutexes first and then THD::LOCK_thd_data.

This will also fix possible race condition during
close_connection and while wsrep is disconnecting
connections.

Added wsrep_bf_kill_debug test case

Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
  • Loading branch information
sjaakola authored and Jan Lindström committed Sep 24, 2021
1 parent 9d97f92 commit 88a4be7
Show file tree
Hide file tree
Showing 18 changed files with 725 additions and 151 deletions.
7 changes: 7 additions & 0 deletions mysql-test/suite/galera/r/galera_UK_conflict.result
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ f1 f2 f3
10 10 0
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
SELECT COUNT(*) FROM t1;
COUNT(*)
7
SELECT * FROM t1;
f1 f2 f3
1 1 0
Expand All @@ -78,12 +81,16 @@ f1 f2 f3
8 8 8
10 10 0
connection node_1;
SELECT COUNT(*) FROM t1;
COUNT(*)
7
SELECT * FROM t1;
f1 f2 f3
1 1 0
3 3 1
4 4 2
5 5 2
7 7 7
8 8 8
10 10 0
DROP TABLE t1;
163 changes: 163 additions & 0 deletions mysql-test/suite/galera/r/galera_bf_kill_debug.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
#
# Case 1: We execute bf kill to wsrep_innobase_kill_one_trx
# function just before wsrep_thd_LOCK(thd) call. Then we
# try to kill victim transaction by KILL QUERY
#
CREATE TABLE t1(id int not null primary key, b int) engine=innodb;
INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
begin;
update t1 set b = b * 10 where id between 2 and 4;
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
SET DEBUG_SYNC='wsrep_before_BF_victim_lock SIGNAL bf_kill WAIT_FOR bf_continue';
ALTER TABLE t1 ADD UNIQUE KEY b1(b);;
connection node_1;
SET DEBUG_SYNC='now WAIT_FOR bf_kill';
connection node_1b;
Table Create Table
t1 CREATE TABLE `t1` (
`id` int(11) NOT NULL,
`b` int(11) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `b1` (`b`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
id b
1 1
2 2
3 3
4 4
5 5
connection node_1;
SET DEBUG_SYNC= 'RESET';
DROP TABLE t1;
disconnect node_1a;
disconnect node_1b;
disconnect node_1c;
#
# Case 2: We execute bf kill to wsrep_innobase_kill_one_trx
# function just after wsrep_thd_LOCK(thd) call. Then we
# try to kill victim transaction by KILL QUERY
#
CREATE TABLE t1(id int not null primary key, b int) engine=innodb;
INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
begin;
update t1 set b = b * 10 where id between 2 and 4;
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
SET DEBUG_SYNC='wsrep_after_BF_victim_lock SIGNAL bf_kill WAIT_FOR bf_continue';
ALTER TABLE t1 ADD UNIQUE KEY b1(b);;
connection node_1;
SET DEBUG_SYNC='now WAIT_FOR bf_kill';
connection node_1b;
Table Create Table
t1 CREATE TABLE `t1` (
`id` int(11) NOT NULL,
`b` int(11) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `b1` (`b`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
id b
1 1
2 2
3 3
4 4
5 5
connection node_1;
SET DEBUG_SYNC= 'RESET';
DROP TABLE t1;
disconnect node_1a;
disconnect node_1b;
disconnect node_1c;
#
# Case 3: Create victim transaction and try to send user KILL
# from several threads
#
CREATE TABLE t1(id int not null primary key, b int) engine=innodb;
INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
begin;
update t1 set b = b * 10 where id between 2 and 4;
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connect node_1d, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connection node_1b;
connection node_1c;
connection node_1d;
connection node_1;
disconnect node_1a;
disconnect node_1b;
disconnect node_1c;
disconnect node_1d;
DROP TABLE t1;
#
# Case 4: MDL-conflict, we execute ALTER until we hit gap in
# wsrep_abort_transaction, while we are there we try to
# manually KILL conflicting transaction (UPDATE) and
# send conflicting transaction from other node to be executed
# in this node by applier. As ALTER and KILL are TOI they
# are not executed concurrently. Similarly UPDATE from other
# node will wait for certification.
#
CREATE TABLE t1(id int not null primary key, b int) engine=innodb;
INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
begin;
update t1 set b = b * 10 where id between 2 and 4;
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
SET DEBUG_SYNC='wsrep_abort_victim_unlocked SIGNAL bf_kill_unlocked WAIT_FOR bf_continue';
ALTER TABLE t1 ADD UNIQUE KEY b1(b);;
connection node_1;
SET DEBUG_SYNC='now WAIT_FOR bf_kill_unlocked';
connection node_1b;
connection node_2;
update t1 set b = b + 1000 where id between 2 and 4;;
connection node_1;
SET DEBUG_SYNC='now SIGNAL bf_continue';
connection node_1c;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`id` int(11) NOT NULL,
`b` int(11) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `b1` (`b`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
SELECT * FROM t1;
id b
1 1
5 5
2 1002
3 1003
4 1004
connection node_1b;
connection node_1;
SET DEBUG_SYNC= 'RESET';
SELECT * FROM t1;
id b
1 1
5 5
2 1002
3 1003
4 1004
connection node_2;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`id` int(11) NOT NULL,
`b` int(11) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `b1` (`b`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
SELECT * FROM t1;
id b
1 1
5 5
2 1002
3 1003
4 1004
DROP TABLE t1;
disconnect node_1a;
disconnect node_1c;
16 changes: 0 additions & 16 deletions mysql-test/suite/galera/r/galera_toi_ddl_fk_insert.result
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,6 @@ connection node_1a;
connection node_1b;
connection node_2;
connection node_2a;
connection node_1;
SET SESSION wsrep_sync_wait=15;
SELECT COUNT(*) FROM parent;
COUNT(*)
20001
SELECT COUNT(*) FROM child;
COUNT(*)
10000
connection node_2;
SET SESSION wsrep_sync_wait=15;
SELECT COUNT(*) FROM parent;
COUNT(*)
20001
SELECT COUNT(*) FROM child;
COUNT(*)
10000
DROP TABLE child;
DROP TABLE parent;
DROP TABLE ten;
4 changes: 4 additions & 0 deletions mysql-test/suite/galera/t/galera_UK_conflict.test
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,13 @@ SELECT * FROM t1;
# original state in node 1
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
SELECT COUNT(*) FROM t1;
SELECT * FROM t1;

--connection node_1
--let $wait_condition = SELECT COUNT(*) = 7 FROM t1
--source include/wait_condition.inc
SELECT COUNT(*) FROM t1;
SELECT * FROM t1;

DROP TABLE t1;
9 changes: 9 additions & 0 deletions mysql-test/suite/galera/t/galera_bf_kill_debug.cnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
!include ../galera_2nodes.cnf

[mysqld.1]
wsrep_log_conflicts=ON
wsrep_debug=1

[mysqld.2]
wsrep_log_conflicts=ON
wsrep_debug=1
Loading

0 comments on commit 88a4be7

Please sign in to comment.