Skip to content

Commit

Permalink
MDEV-31448: Killing a replica thread awaiting its GCO can hang/crash …
Browse files Browse the repository at this point in the history
…a parallel replica

Various test cases for the bugs around MDEV-31448.
Test cases due to Brandon Nesterenko, thanks!

Reviewed-by: Andrei Elkin <andrei.elkin@mariadb.com>
Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
  • Loading branch information
knielsen committed Jul 12, 2023
1 parent 5d61442 commit d4309d4
Show file tree
Hide file tree
Showing 6 changed files with 464 additions and 0 deletions.
68 changes: 68 additions & 0 deletions mysql-test/suite/rpl/include/mdev-31448_conservative.inc
@@ -0,0 +1,68 @@
--connection master
create table t1 (a int) engine=innodb;
create table t2 (a int) engine=innodb;
insert into t1 values (1);
--source include/save_master_gtid.inc

--connection slave
call mtr.add_suppression("Slave: Commit failed due to failure of an earlier commit on which this one depends");

--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
set @save.slave_parallel_threads= @@global.slave_parallel_threads;
set @save.slave_parallel_mode= @@global.slave_parallel_mode;
set @@global.slave_parallel_threads= 3;
set @@global.slave_parallel_mode= CONSERVATIVE;
--connection slave1
BEGIN;
update t1 set a=2 where a=1;

--connection master
SET @old_dbug= @@SESSION.debug_dbug;
SET @@SESSION.debug_dbug="+d,binlog_force_commit_id";

# GCO 1
SET @commit_id= 10000;
# T1
update t1 set a=2 where a=1;
# T2
insert into t2 values (1);

# GCO 2
SET @commit_id= 10001;
# T3
insert into t1 values (3);

--connection slave
--source include/start_slave.inc

--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Update_rows_log_event::find_row(-1)' and command LIKE 'Slave_worker';
--source include/wait_condition.inc
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to commit%' and command LIKE 'Slave_worker';
--source include/wait_condition.inc
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to start commit%' and command LIKE 'Slave_worker';
--source include/wait_condition.inc

--let $t3_tid= `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE 'Waiting for prior transaction to start commit%'`
--evalp kill $t3_tid

--connection slave1
commit;

--connection slave
--let $slave_timeout=1032
--source include/wait_for_slave_sql_to_stop.inc

update t1 set a=1 where a=2;
set @@global.slave_parallel_threads = @save.slave_parallel_threads;
set @@global.slave_parallel_mode = @save.slave_parallel_mode;
--source include/start_slave.inc

--echo #
--echo # Cleanup
--connection master
DROP TABLE t1, t2;
--source include/save_master_gtid.inc

--connection slave
--source include/sync_with_master_gtid.inc
94 changes: 94 additions & 0 deletions mysql-test/suite/rpl/include/mdev-31448_optimistic.inc
@@ -0,0 +1,94 @@
--echo # MDEV-31448 OOO finish event group by killed worker
# The test demonstrates how a killed worker access gco lists
# in finish_event_group() out-of-order to fire
# DBUG_ASSERT(!tmp_gco->next_gco || tmp_gco->last_sub_id > sub_id);
# in the buggy version.

--echo # Initialize test data
--connection master
create table t1 (a int) engine=innodb;
create table t2 (a int) engine=innodb;

insert into t1 values (1);
--source include/save_master_gtid.inc

--connection slave
call mtr.add_suppression("Connection was killed");
call mtr.add_suppression("Can.t find record");

--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
set @save.slave_parallel_threads= @@global.slave_parallel_threads;
set @save.slave_parallel_mode= @@global.slave_parallel_mode;
set @@global.slave_parallel_threads= 3;
set @@global.slave_parallel_mode= OPTIMISTIC;

--connection slave1
begin;
update t1 set a=2 where a=1;

--connection master
set @old_dbug= @@session.debug_dbug;
set @@session.debug_dbug="+d,binlog_force_commit_id";

# GCO 1
set @commit_id= 10000;
# T1
update t1 set a=2 where a=1;

if (!$killed_trx_commits)
{
set @commit_id= 10001;
# T2
set statement skip_parallel_replication=1 for insert into t2 values (1);
}

if ($killed_trx_commits)
{
insert into t2 values (1);
}
# GCO 2
# T3
drop table t2;

--connection slave
--source include/start_slave.inc

--echo # wait for T1
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Update_rows_log_event::find_row(-1)' and command LIKE 'Slave_worker';
--source include/wait_condition.inc

--echo # wait for T2
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to commit%' and command LIKE 'Slave_worker';
--source include/wait_condition.inc
--let $t2_tid= `SELECT ID FROM INFORMATION_SCHEMA.PROCESSLIST WHERE STATE LIKE 'Waiting for prior transaction to commit%' and command LIKE 'Slave_worker'`
--echo # wait for T3
--let $wait_condition= SELECT count(*)=1 FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to start commit%' and command LIKE 'Slave_worker';
--source include/wait_condition.inc

--evalp kill $t2_tid
# give some little time for T2 to re-sink into the same state
--let $slave_param=Last_Errno
--let $slave_param_value=1927
--source include/wait_for_slave_param.inc
--connection slave1
commit;

--connection slave
--let $slave_timeout=1032
--source include/wait_for_slave_sql_to_stop.inc

update t1 set a=1 where a=2;
set @@global.slave_parallel_threads = @save.slave_parallel_threads;
set @@global.slave_parallel_mode = @save.slave_parallel_mode;
--source include/start_slave.inc

--echo #
--echo # Cleanup
--connection master
drop table t1;
--source include/save_master_gtid.inc

--connection slave
--source include/sync_with_master_gtid.inc

@@ -0,0 +1,52 @@
include/master-slave.inc
[connection master]
# MDEV-31448 OOO finish event group by killed worker
# Initialize test data
connection master;
call mtr.add_suppression("Slave: Connection was killed");
call mtr.add_suppression("Slave: Commit failed due to failure of an earlier commit on which this one depends");
create table t1 (a int) engine=innodb;
create table t2 (a int) engine=innodb;
insert into t1 values (1);
include/save_master_gtid.inc
connection slave;
include/sync_with_master_gtid.inc
include/stop_slave.inc
set @@global.slave_parallel_threads= 4;
set @@global.slave_parallel_mode= OPTIMISTIC;
set @@global.innodb_lock_wait_timeout= 30;
set @@global.slave_transaction_retries= 0;
connection slave1;
BEGIN;
SELECT * FROM t1 WHERE a=1 FOR UPDATE;
a
1
connection master;
SET @old_dbug= @@SESSION.debug_dbug;
SET @@SESSION.debug_dbug="+d,binlog_force_commit_id";
SET @commit_id= 10000;
update t1 set a=2 where a=1;
set statement skip_parallel_replication=1 for insert into t2 values (1);
drop table t2;
connection slave;
include/start_slave.inc
# wait for T1
# wait for T2
# wait for T3
kill T2_TID;
connection slave1;
ROLLBACK;
connection master;
DROP TABLE t1;
include/save_master_gtid.inc
connection slave;
#
# Cleanup
include/stop_slave.inc
set @@global.slave_parallel_threads= 0;
set @@global.slave_parallel_mode= conservative;
set @@global.innodb_lock_wait_timeout= 50;
set @@global.slave_transaction_retries= 10;
include/start_slave.inc
include/sync_with_master_gtid.inc
include/rpl_end.inc
142 changes: 142 additions & 0 deletions mysql-test/suite/rpl/r/rpl_parallel_kill.result
@@ -0,0 +1,142 @@
include/master-slave.inc
[connection master]
connection master;
create table t1 (a int) engine=innodb;
create table t2 (a int) engine=innodb;
insert into t1 values (1);
include/save_master_gtid.inc
connection slave;
call mtr.add_suppression("Slave: Commit failed due to failure of an earlier commit on which this one depends");
include/sync_with_master_gtid.inc
include/stop_slave.inc
set @save.slave_parallel_threads= @@global.slave_parallel_threads;
set @save.slave_parallel_mode= @@global.slave_parallel_mode;
set @@global.slave_parallel_threads= 3;
set @@global.slave_parallel_mode= CONSERVATIVE;
connection slave1;
BEGIN;
update t1 set a=2 where a=1;
connection master;
SET @old_dbug= @@SESSION.debug_dbug;
SET @@SESSION.debug_dbug="+d,binlog_force_commit_id";
SET @commit_id= 10000;
update t1 set a=2 where a=1;
insert into t2 values (1);
SET @commit_id= 10001;
insert into t1 values (3);
connection slave;
include/start_slave.inc
kill $t3_tid;
connection slave1;
commit;
connection slave;
include/wait_for_slave_sql_to_stop.inc
update t1 set a=1 where a=2;
set @@global.slave_parallel_threads = @save.slave_parallel_threads;
set @@global.slave_parallel_mode = @save.slave_parallel_mode;
include/start_slave.inc
#
# Cleanup
connection master;
DROP TABLE t1, t2;
include/save_master_gtid.inc
connection slave;
include/sync_with_master_gtid.inc
# MDEV-31448 OOO finish event group by killed worker
# Initialize test data
connection master;
create table t1 (a int) engine=innodb;
create table t2 (a int) engine=innodb;
insert into t1 values (1);
include/save_master_gtid.inc
connection slave;
call mtr.add_suppression("Connection was killed");
call mtr.add_suppression("Can.t find record");
include/sync_with_master_gtid.inc
include/stop_slave.inc
set @save.slave_parallel_threads= @@global.slave_parallel_threads;
set @save.slave_parallel_mode= @@global.slave_parallel_mode;
set @@global.slave_parallel_threads= 3;
set @@global.slave_parallel_mode= OPTIMISTIC;
connection slave1;
begin;
update t1 set a=2 where a=1;
connection master;
set @old_dbug= @@session.debug_dbug;
set @@session.debug_dbug="+d,binlog_force_commit_id";
set @commit_id= 10000;
update t1 set a=2 where a=1;
insert into t2 values (1);
drop table t2;
connection slave;
include/start_slave.inc
# wait for T1
# wait for T2
# wait for T3
kill $t2_tid;
include/wait_for_slave_param.inc [Last_Errno]
connection slave1;
commit;
connection slave;
include/wait_for_slave_sql_to_stop.inc
update t1 set a=1 where a=2;
set @@global.slave_parallel_threads = @save.slave_parallel_threads;
set @@global.slave_parallel_mode = @save.slave_parallel_mode;
include/start_slave.inc
#
# Cleanup
connection master;
drop table t1;
include/save_master_gtid.inc
connection slave;
include/sync_with_master_gtid.inc
# MDEV-31448 OOO finish event group by killed worker
# Initialize test data
connection master;
create table t1 (a int) engine=innodb;
create table t2 (a int) engine=innodb;
insert into t1 values (1);
include/save_master_gtid.inc
connection slave;
call mtr.add_suppression("Connection was killed");
call mtr.add_suppression("Can.t find record");
include/sync_with_master_gtid.inc
include/stop_slave.inc
set @save.slave_parallel_threads= @@global.slave_parallel_threads;
set @save.slave_parallel_mode= @@global.slave_parallel_mode;
set @@global.slave_parallel_threads= 3;
set @@global.slave_parallel_mode= OPTIMISTIC;
connection slave1;
begin;
update t1 set a=2 where a=1;
connection master;
set @old_dbug= @@session.debug_dbug;
set @@session.debug_dbug="+d,binlog_force_commit_id";
set @commit_id= 10000;
update t1 set a=2 where a=1;
set @commit_id= 10001;
set statement skip_parallel_replication=1 for insert into t2 values (1);
drop table t2;
connection slave;
include/start_slave.inc
# wait for T1
# wait for T2
# wait for T3
kill $t2_tid;
include/wait_for_slave_param.inc [Last_Errno]
connection slave1;
commit;
connection slave;
include/wait_for_slave_sql_to_stop.inc
update t1 set a=1 where a=2;
set @@global.slave_parallel_threads = @save.slave_parallel_threads;
set @@global.slave_parallel_mode = @save.slave_parallel_mode;
include/start_slave.inc
#
# Cleanup
connection master;
drop table t1;
include/save_master_gtid.inc
connection slave;
include/sync_with_master_gtid.inc
include/rpl_end.inc

0 comments on commit d4309d4

Please sign in to comment.