-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MDEV-25114 Crash: WSREP: invalid state ROLLED_BACK (FATAL)
This patch is the plan D variant for fixing potetial mutex locking order exercised by BF aborting and KILL command execution. In this approach, KILL command is replicated as TOI operation. This guarantees total isolation for the KILL command execution in the first node: there is no concurrent replication applying and no concurrent DDL executing. Therefore there is no risk of BF aborting to happen in parallel with KILL command execution either. Potential mutex deadlocks between the different mutex access paths with KILL command execution and BF aborting cannot therefore happen. TOI replication is used, in this approach, purely as means to provide isolated KILL command execution in the first node. KILL command should not (and must not) be applied in secondary nodes. In this patch, we make this sure by skipping KILL execution in secondary nodes, in applying phase, where we bail out if applier thread is trying to execute KILL command. This is effective, but skipping the applying of KILL command could happen much earlier as well. This patch also fixes mutex locking order and unprotected THD member accesses on bf aborting case. We try to hold THD::LOCK_thd_data during bf aborting. Only case where it is not possible is at wsrep_abort_transaction before call wsrep_innobase_kill_one_trx where we take InnoDB mutexes first and then THD::LOCK_thd_data. This will also fix possible race condition during close_connection and while wsrep is disconnecting connections. Added wsrep_bf_kill_debug test case Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
- Loading branch information
Showing
18 changed files
with
725 additions
and
151 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
# | ||
# Case 1: We execute bf kill to wsrep_innobase_kill_one_trx | ||
# function just before wsrep_thd_LOCK(thd) call. Then we | ||
# try to kill victim transaction by KILL QUERY | ||
# | ||
CREATE TABLE t1(id int not null primary key, b int) engine=innodb; | ||
INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5); | ||
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
begin; | ||
update t1 set b = b * 10 where id between 2 and 4; | ||
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
SET DEBUG_SYNC='wsrep_before_BF_victim_lock SIGNAL bf_kill WAIT_FOR bf_continue'; | ||
ALTER TABLE t1 ADD UNIQUE KEY b1(b);; | ||
connection node_1; | ||
SET DEBUG_SYNC='now WAIT_FOR bf_kill'; | ||
connection node_1b; | ||
Table Create Table | ||
t1 CREATE TABLE `t1` ( | ||
`id` int(11) NOT NULL, | ||
`b` int(11) DEFAULT NULL, | ||
PRIMARY KEY (`id`), | ||
UNIQUE KEY `b1` (`b`) | ||
) ENGINE=InnoDB DEFAULT CHARSET=latin1 | ||
id b | ||
1 1 | ||
2 2 | ||
3 3 | ||
4 4 | ||
5 5 | ||
connection node_1; | ||
SET DEBUG_SYNC= 'RESET'; | ||
DROP TABLE t1; | ||
disconnect node_1a; | ||
disconnect node_1b; | ||
disconnect node_1c; | ||
# | ||
# Case 2: We execute bf kill to wsrep_innobase_kill_one_trx | ||
# function just after wsrep_thd_LOCK(thd) call. Then we | ||
# try to kill victim transaction by KILL QUERY | ||
# | ||
CREATE TABLE t1(id int not null primary key, b int) engine=innodb; | ||
INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5); | ||
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
begin; | ||
update t1 set b = b * 10 where id between 2 and 4; | ||
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
SET DEBUG_SYNC='wsrep_after_BF_victim_lock SIGNAL bf_kill WAIT_FOR bf_continue'; | ||
ALTER TABLE t1 ADD UNIQUE KEY b1(b);; | ||
connection node_1; | ||
SET DEBUG_SYNC='now WAIT_FOR bf_kill'; | ||
connection node_1b; | ||
Table Create Table | ||
t1 CREATE TABLE `t1` ( | ||
`id` int(11) NOT NULL, | ||
`b` int(11) DEFAULT NULL, | ||
PRIMARY KEY (`id`), | ||
UNIQUE KEY `b1` (`b`) | ||
) ENGINE=InnoDB DEFAULT CHARSET=latin1 | ||
id b | ||
1 1 | ||
2 2 | ||
3 3 | ||
4 4 | ||
5 5 | ||
connection node_1; | ||
SET DEBUG_SYNC= 'RESET'; | ||
DROP TABLE t1; | ||
disconnect node_1a; | ||
disconnect node_1b; | ||
disconnect node_1c; | ||
# | ||
# Case 3: Create victim transaction and try to send user KILL | ||
# from several threads | ||
# | ||
CREATE TABLE t1(id int not null primary key, b int) engine=innodb; | ||
INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5); | ||
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
begin; | ||
update t1 set b = b * 10 where id between 2 and 4; | ||
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
connect node_1d, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
connection node_1b; | ||
connection node_1c; | ||
connection node_1d; | ||
connection node_1; | ||
disconnect node_1a; | ||
disconnect node_1b; | ||
disconnect node_1c; | ||
disconnect node_1d; | ||
DROP TABLE t1; | ||
# | ||
# Case 4: MDL-conflict, we execute ALTER until we hit gap in | ||
# wsrep_abort_transaction, while we are there we try to | ||
# manually KILL conflicting transaction (UPDATE) and | ||
# send conflicting transaction from other node to be executed | ||
# in this node by applier. As ALTER and KILL are TOI they | ||
# are not executed concurrently. Similarly UPDATE from other | ||
# node will wait for certification. | ||
# | ||
CREATE TABLE t1(id int not null primary key, b int) engine=innodb; | ||
INSERT INTO t1 values (1,1),(2,2),(3,3),(4,4),(5,5); | ||
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
begin; | ||
update t1 set b = b * 10 where id between 2 and 4; | ||
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
SET DEBUG_SYNC='wsrep_abort_victim_unlocked SIGNAL bf_kill_unlocked WAIT_FOR bf_continue'; | ||
ALTER TABLE t1 ADD UNIQUE KEY b1(b);; | ||
connection node_1; | ||
SET DEBUG_SYNC='now WAIT_FOR bf_kill_unlocked'; | ||
connection node_1b; | ||
connection node_2; | ||
update t1 set b = b + 1000 where id between 2 and 4;; | ||
connection node_1; | ||
SET DEBUG_SYNC='now SIGNAL bf_continue'; | ||
connection node_1c; | ||
SHOW CREATE TABLE t1; | ||
Table Create Table | ||
t1 CREATE TABLE `t1` ( | ||
`id` int(11) NOT NULL, | ||
`b` int(11) DEFAULT NULL, | ||
PRIMARY KEY (`id`), | ||
UNIQUE KEY `b1` (`b`) | ||
) ENGINE=InnoDB DEFAULT CHARSET=latin1 | ||
SELECT * FROM t1; | ||
id b | ||
1 1 | ||
5 5 | ||
2 1002 | ||
3 1003 | ||
4 1004 | ||
connection node_1b; | ||
connection node_1; | ||
SET DEBUG_SYNC= 'RESET'; | ||
SELECT * FROM t1; | ||
id b | ||
1 1 | ||
5 5 | ||
2 1002 | ||
3 1003 | ||
4 1004 | ||
connection node_2; | ||
SHOW CREATE TABLE t1; | ||
Table Create Table | ||
t1 CREATE TABLE `t1` ( | ||
`id` int(11) NOT NULL, | ||
`b` int(11) DEFAULT NULL, | ||
PRIMARY KEY (`id`), | ||
UNIQUE KEY `b1` (`b`) | ||
) ENGINE=InnoDB DEFAULT CHARSET=latin1 | ||
SELECT * FROM t1; | ||
id b | ||
1 1 | ||
5 5 | ||
2 1002 | ||
3 1003 | ||
4 1004 | ||
DROP TABLE t1; | ||
disconnect node_1a; | ||
disconnect node_1c; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
!include ../galera_2nodes.cnf | ||
|
||
[mysqld.1] | ||
wsrep_log_conflicts=ON | ||
wsrep_debug=1 | ||
|
||
[mysqld.2] | ||
wsrep_log_conflicts=ON | ||
wsrep_debug=1 |
Oops, something went wrong.