Skip to content

Commit 501c56e

Browse files
committed
MDEV-5262, MDEV-5914, MDEV-5941, MDEV-6020: Deadlocks during parallel replication causing replication to fail.
Merge the patches into MariaDB 10.0 main. With this patch, parallel replication will now automatically retry a transaction that fails due to deadlock or other temporary error, same as single-threaded replication. We catch deadlocks with InnoDB transactions due to enforced commit order. If T1 must commit before T2 in parallel replication and T1 ends up waiting for T2 inside InnoDB, we kill T2 and retry it later to resolve the deadlock automatically.
2 parents fd0abec + e81ecc9 commit 501c56e

33 files changed

+1908
-263
lines changed

mysql-test/r/innodb_mysql_sync.result

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,10 @@ SET DEBUG_SYNC= 'now SIGNAL killed';
8686
# Reaping: OPTIMIZE TABLE t1
8787
Table Op Msg_type Msg_text
8888
test.t1 optimize note Table does not support optimize, doing recreate + analyze instead
89+
test.t1 optimize error Query execution was interrupted
8990
test.t1 optimize status Operation failed
91+
Warnings:
92+
Error 1317 Query execution was interrupted
9093
# Connection default
9194
DROP TABLE t1;
9295
SET DEBUG_SYNC= 'RESET';
504 KB
Binary file not shown.
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
include/master-slave.inc
2+
[connection master]
3+
include/stop_slave.inc
4+
include/rpl_stop_server.inc [server_number=1]
5+
include/rpl_start_server.inc [server_number=1]
6+
SET SQL_LOG_BIN=0;
7+
ALTER TABLE mysql.gtid_slave_pos ENGINE = InnoDB;
8+
SET SQL_LOG_BIN=1;
9+
SET @old_engine= @@GLOBAL.default_storage_engine;
10+
SET GLOBAL default_storage_engine=InnoDB;
11+
SET @old_parallel= @@GLOBAL.slave_parallel_threads;
12+
SET GLOBAL slave_parallel_threads=12;
13+
CHANGE MASTER TO master_host='127.0.0.1', master_port=SERVER_MYPORT_1, master_user='root', master_log_file='master-bin.000001', master_log_pos=4;
14+
include/start_slave.inc
15+
SET SQL_LOG_BIN=0;
16+
ALTER TABLE mysql.gtid_slave_pos ENGINE = InnoDB;
17+
SET SQL_LOG_BIN=1;
18+
SELECT @@gtid_slave_pos;
19+
@@gtid_slave_pos
20+
0-1-1381
21+
CHECKSUM TABLE table0_int_autoinc, table0_key_pk_parts_2_int_autoinc, table100_int_autoinc, table100_key_pk_parts_2_int_autoinc, table10_int_autoinc, table10_key_pk_parts_2_int_autoinc, table1_int_autoinc, table1_key_pk_parts_2_int_autoinc, table2_int_autoinc, table2_key_pk_parts_2_int_autoinc;
22+
Table Checksum
23+
test.table0_int_autoinc 3623174395
24+
test.table0_key_pk_parts_2_int_autoinc 2888328157
25+
test.table100_int_autoinc 3624823809
26+
test.table100_key_pk_parts_2_int_autoinc 3316583308
27+
test.table10_int_autoinc 1615053718
28+
test.table10_key_pk_parts_2_int_autoinc 4147461080
29+
test.table1_int_autoinc 478809705
30+
test.table1_key_pk_parts_2_int_autoinc 3032208641
31+
test.table2_int_autoinc 854763867
32+
test.table2_key_pk_parts_2_int_autoinc 4231615291
33+
include/stop_slave.inc
34+
SET GLOBAL default_storage_engine= @old_engine;
35+
SET GLOBAL slave_parallel_threads=@old_parallel;
36+
SET sql_log_bin=0;
37+
DROP TABLE table0_int_autoinc;
38+
DROP TABLE table0_key_pk_parts_2_int_autoinc;
39+
DROP TABLE table100_int_autoinc;
40+
DROP TABLE table100_key_pk_parts_2_int_autoinc;
41+
DROP TABLE table10_int_autoinc;
42+
DROP TABLE table10_key_pk_parts_2_int_autoinc;
43+
DROP TABLE table1_int_autoinc;
44+
DROP TABLE table1_key_pk_parts_2_int_autoinc;
45+
DROP TABLE table2_int_autoinc;
46+
DROP TABLE table2_key_pk_parts_2_int_autoinc;
47+
SET sql_log_bin=1;
48+
include/start_slave.inc
49+
include/rpl_end.inc

mysql-test/suite/rpl/r/rpl_parallel.result

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ SET debug_sync='now WAIT_FOR t1_ready';
314314
KILL THD_ID;
315315
SET debug_sync='now WAIT_FOR t2_killed';
316316
SET debug_sync='now SIGNAL t1_cont';
317-
include/wait_for_slave_sql_error.inc [errno=1317,1964]
317+
include/wait_for_slave_sql_error.inc [errno=1317,1927,1964]
318318
STOP SLAVE IO_THREAD;
319319
SELECT * FROM t3 WHERE a >= 30 ORDER BY a;
320320
a b
@@ -398,7 +398,7 @@ SET debug_sync='now WAIT_FOR t1_ready';
398398
KILL THD_ID;
399399
SET debug_sync='now WAIT_FOR t2_killed';
400400
SET debug_sync='now SIGNAL t1_cont';
401-
include/wait_for_slave_sql_error.inc [errno=1317,1964]
401+
include/wait_for_slave_sql_error.inc [errno=1317,1927,1964]
402402
SET debug_sync='RESET';
403403
SET GLOBAL slave_parallel_threads=0;
404404
SET GLOBAL slave_parallel_threads=10;
@@ -481,7 +481,7 @@ SET debug_sync='now WAIT_FOR t1_ready';
481481
KILL THD_ID;
482482
SET debug_sync='now WAIT_FOR t2_killed';
483483
SET debug_sync='now SIGNAL t1_cont';
484-
include/wait_for_slave_sql_error.inc [errno=1317,1964]
484+
include/wait_for_slave_sql_error.inc [errno=1317,1927,1964]
485485
SELECT * FROM t3 WHERE a >= 50 ORDER BY a;
486486
a b
487487
51 51
@@ -819,11 +819,37 @@ test_check
819819
OK
820820
test_check
821821
OK
822+
*** MDEV_6435: Incorrect error handling when query binlogged partially on master with "killed" error ***
823+
CREATE TABLE t6 (a INT) ENGINE=MyISAM;
824+
CREATE TRIGGER tr AFTER INSERT ON t6 FOR EACH ROW SET @a = 1;
825+
SET @old_format= @@binlog_format;
826+
SET binlog_format= statement;
827+
SET debug_sync='sp_head_execute_before_loop SIGNAL ready WAIT_FOR cont';
828+
INSERT INTO t6 VALUES (1), (2), (3);
829+
SET debug_sync='now WAIT_FOR ready';
830+
KILL QUERY CONID;
831+
SET debug_sync='now SIGNAL cont';
832+
ERROR 70100: Query execution was interrupted
833+
SET binlog_format= @old_format;
834+
SET debug_sync='RESET';
835+
SET debug_sync='RESET';
836+
include/wait_for_slave_sql_error.inc [errno=1317]
837+
STOP SLAVE IO_THREAD;
838+
SET GLOBAL gtid_slave_pos= 'AFTER_ERROR_GTID_POS';
839+
include/start_slave.inc
840+
INSERT INTO t6 VALUES (4);
841+
SELECT * FROM t6 ORDER BY a;
842+
a
843+
1
844+
4
845+
SELECT * FROM t6 ORDER BY a;
846+
a
847+
4
822848
include/stop_slave.inc
823849
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
824850
include/start_slave.inc
825851
SET DEBUG_SYNC= 'RESET';
826852
DROP function foo;
827-
DROP TABLE t1,t2,t3,t4,t5;
853+
DROP TABLE t1,t2,t3,t4,t5,t6;
828854
SET DEBUG_SYNC= 'RESET';
829855
include/rpl_end.inc
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
include/rpl_init.inc [topology=1->2]
2+
*** Test retry of transactions that fail to replicate due to deadlock or similar temporary error. ***
3+
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
4+
CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
5+
INSERT INTO t1 VALUES (1,1);
6+
SET sql_log_bin=0;
7+
CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500))
8+
RETURNS INT DETERMINISTIC
9+
BEGIN
10+
RETURN x;
11+
END
12+
||
13+
SET sql_log_bin=1;
14+
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
15+
include/stop_slave.inc
16+
SET GLOBAL slave_parallel_threads=5;
17+
include/start_slave.inc
18+
SET sql_log_bin=0;
19+
CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500))
20+
RETURNS INT DETERMINISTIC
21+
BEGIN
22+
IF d1 != '' THEN
23+
SET debug_sync = d1;
24+
END IF;
25+
IF d2 != '' THEN
26+
SET debug_sync = d2;
27+
END IF;
28+
RETURN x;
29+
END
30+
||
31+
SET sql_log_bin=1;
32+
include/stop_slave.inc
33+
SET gtid_seq_no = 100;
34+
BEGIN;
35+
INSERT INTO t1 VALUES (2,1);
36+
UPDATE t1 SET b=b+1 WHERE a=1;
37+
INSERT INTO t1 VALUES (3,1);
38+
COMMIT;
39+
SELECT * FROM t1 ORDER BY a;
40+
a b
41+
1 2
42+
2 1
43+
3 1
44+
SET @old_dbug= @@GLOBAL.debug_dbug;
45+
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100";
46+
include/start_slave.inc
47+
SET GLOBAL debug_dbug=@old_dbug;
48+
retries
49+
1
50+
SELECT * FROM t1 ORDER BY a;
51+
a b
52+
1 2
53+
2 1
54+
3 1
55+
*** Test that double retry works when the first retry also fails with temp error ***
56+
include/stop_slave.inc
57+
SET gtid_seq_no = 100;
58+
SET @old_server_id = @@server_id;
59+
SET server_id = 10;
60+
BEGIN;
61+
INSERT INTO t1 VALUES (4,1);
62+
UPDATE t1 SET b=b+1 WHERE a=1;
63+
INSERT INTO t1 VALUES (5,1);
64+
INSERT INTO t1 VALUES (6,1);
65+
COMMIT;
66+
SET server_id = @old_server_id;
67+
SELECT * FROM t1 ORDER BY a;
68+
a b
69+
1 3
70+
2 1
71+
3 1
72+
4 1
73+
5 1
74+
6 1
75+
SET @old_dbug= @@GLOBAL.debug_dbug;
76+
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_double_temp_err_gtid_0_x_100";
77+
include/start_slave.inc
78+
SET GLOBAL debug_dbug=@old_dbug;
79+
retries
80+
2
81+
SELECT * FROM t1 ORDER BY a;
82+
a b
83+
1 3
84+
2 1
85+
3 1
86+
4 1
87+
5 1
88+
6 1
89+
*** Test too many retries, eventually causing failure. ***
90+
include/stop_slave.inc
91+
SET gtid_seq_no = 100;
92+
SET @old_server_id = @@server_id;
93+
SET server_id = 11;
94+
BEGIN;
95+
INSERT INTO t1 VALUES (7,1);
96+
UPDATE t1 SET b=b+1 WHERE a=1;
97+
INSERT INTO t1 VALUES (8,1);
98+
INSERT INTO t1 VALUES (9,1);
99+
COMMIT;
100+
SET server_id = @old_server_id;
101+
SELECT * FROM t1 ORDER BY a;
102+
a b
103+
1 4
104+
2 1
105+
3 1
106+
4 1
107+
5 1
108+
6 1
109+
7 1
110+
8 1
111+
9 1
112+
SET sql_log_bin=0;
113+
CALL mtr.add_suppression("Slave worker thread retried transaction 10 time\\(s\\) in vain, giving up");
114+
CALL mtr.add_suppression("Slave: Deadlock found when trying to get lock; try restarting transaction");
115+
SET sql_log_bin=1;
116+
SET @old_dbug= @@GLOBAL.debug_dbug;
117+
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100";
118+
START SLAVE;
119+
include/wait_for_slave_sql_error.inc [errno=1213]
120+
SET GLOBAL debug_dbug=@old_dbug;
121+
retries
122+
10
123+
SELECT * FROM t1 ORDER BY a;
124+
a b
125+
1 3
126+
2 1
127+
3 1
128+
4 1
129+
5 1
130+
6 1
131+
STOP SLAVE IO_THREAD;
132+
include/start_slave.inc
133+
SELECT * FROM t1 ORDER BY a;
134+
a b
135+
1 4
136+
2 1
137+
3 1
138+
4 1
139+
5 1
140+
6 1
141+
7 1
142+
8 1
143+
9 1
144+
*** Test retry of event group that spans multiple relay log files. ***
145+
CREATE TABLE t2 (a int PRIMARY KEY, b BLOB) ENGINE=InnoDB;
146+
INSERT INTO t2 VALUES (1,"Hulubullu");
147+
include/stop_slave.inc
148+
SET @old_max= @@GLOBAL.max_relay_log_size;
149+
SET GLOBAL max_relay_log_size=4096;
150+
SET gtid_seq_no = 100;
151+
SET @old_server_id = @@server_id;
152+
SET server_id = 12;
153+
BEGIN;
154+
INSERT INTO t1 VALUES (10, 4);
155+
COMMIT;
156+
SET server_id = @old_server_id;
157+
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
158+
a b
159+
10 4
160+
SELECT a, LENGTH(b) FROM t2 ORDER BY a;
161+
a LENGTH(b)
162+
1 9
163+
2 5006
164+
3 5012
165+
SET @old_dbug= @@GLOBAL.debug_dbug;
166+
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100";
167+
include/start_slave.inc
168+
SET GLOBAL debug_dbug=@old_dbug;
169+
retries
170+
1
171+
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
172+
a b
173+
10 4
174+
SELECT a, LENGTH(b) FROM t2 ORDER BY a;
175+
a LENGTH(b)
176+
1 9
177+
2 5006
178+
3 5012
179+
INSERT INTO t1 VALUES (11,11);
180+
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
181+
a b
182+
10 4
183+
11 11
184+
SELECT a, LENGTH(b) FROM t2 ORDER BY a;
185+
a LENGTH(b)
186+
1 9
187+
2 5006
188+
3 5012
189+
4 5000
190+
SET GLOBAL max_relay_log_size=@old_max;
191+
include/stop_slave.inc
192+
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
193+
include/start_slave.inc
194+
DROP TABLE t1, t2;
195+
DROP function foo;
196+
include/rpl_end.inc
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
--source include/have_innodb.inc
2+
--source include/have_partition.inc
3+
--source include/have_binlog_format_mixed_or_row.inc
4+
--source include/master-slave.inc
5+
6+
--connection slave
7+
--source include/stop_slave.inc
8+
9+
--connection master
10+
--let $datadir= `SELECT @@datadir`
11+
12+
--let $rpl_server_number= 1
13+
--source include/rpl_stop_server.inc
14+
15+
--remove_file $datadir/master-bin.000001
16+
--remove_file $datadir/master-bin.state
17+
--copy_file $MYSQL_TEST_DIR/std_data/mdev6020-mysql-bin.000001 $datadir/master-bin.000001
18+
19+
--let $rpl_server_number= 1
20+
--source include/rpl_start_server.inc
21+
22+
--source include/wait_until_connected_again.inc
23+
24+
--connection slave
25+
SET SQL_LOG_BIN=0;
26+
ALTER TABLE mysql.gtid_slave_pos ENGINE = InnoDB;
27+
SET SQL_LOG_BIN=1;
28+
SET @old_engine= @@GLOBAL.default_storage_engine;
29+
SET GLOBAL default_storage_engine=InnoDB;
30+
SET @old_parallel= @@GLOBAL.slave_parallel_threads;
31+
SET GLOBAL slave_parallel_threads=12;
32+
--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1
33+
eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_log_file='master-bin.000001', master_log_pos=4;
34+
--source include/start_slave.inc
35+
36+
--connection master
37+
SET SQL_LOG_BIN=0;
38+
ALTER TABLE mysql.gtid_slave_pos ENGINE = InnoDB;
39+
SET SQL_LOG_BIN=1;
40+
--save_master_pos
41+
42+
--connection slave
43+
--sync_with_master
44+
45+
SELECT @@gtid_slave_pos;
46+
CHECKSUM TABLE table0_int_autoinc, table0_key_pk_parts_2_int_autoinc, table100_int_autoinc, table100_key_pk_parts_2_int_autoinc, table10_int_autoinc, table10_key_pk_parts_2_int_autoinc, table1_int_autoinc, table1_key_pk_parts_2_int_autoinc, table2_int_autoinc, table2_key_pk_parts_2_int_autoinc;
47+
48+
--source include/stop_slave.inc
49+
50+
51+
SET GLOBAL default_storage_engine= @old_engine;
52+
SET GLOBAL slave_parallel_threads=@old_parallel;
53+
SET sql_log_bin=0;
54+
DROP TABLE table0_int_autoinc;
55+
DROP TABLE table0_key_pk_parts_2_int_autoinc;
56+
DROP TABLE table100_int_autoinc;
57+
DROP TABLE table100_key_pk_parts_2_int_autoinc;
58+
DROP TABLE table10_int_autoinc;
59+
DROP TABLE table10_key_pk_parts_2_int_autoinc;
60+
DROP TABLE table1_int_autoinc;
61+
DROP TABLE table1_key_pk_parts_2_int_autoinc;
62+
DROP TABLE table2_int_autoinc;
63+
DROP TABLE table2_key_pk_parts_2_int_autoinc;
64+
SET sql_log_bin=1;
65+
66+
--source include/start_slave.inc
67+
68+
--connection master
69+
70+
--source include/rpl_end.inc

0 commit comments

Comments
 (0)