From 8d238d47268bcd7470abed147d79eb9546b28ac4 Mon Sep 17 00:00:00 2001 From: Andrei Date: Thu, 30 Jun 2022 15:46:19 +0300 Subject: [PATCH] MDEV-28609 refine gtid-strict-mode to ignore same server-id gtid from the past ... on semisync slave To provide semisync master crash-recovery the same server-id transactions were made to accept for execution on the semisync slave when the strict gtid mode (see MDEV-27760). That however caused out-of-order error on a master's transaction server of the circular setup. The error was fair in the sense of the gtid strict mode rule as indeed under the condition of the circular setup the replicated transaction already exists in the local binlog. This is fixed by the commit to ignore on the gtid strict mode semisync slave those gtids that exist in the slave's binlog that effectively restores the default same-server-id ignore policy. At the same time the fixes complies with MDEV-21117 semisync slave recovery to accept the same server-id transactions that do not exist in local binlog. --- mysql-test/main/mysqld--help.result | 4 +- .../suite/rpl/r/rpl_circular_semi_sync.result | 133 +++++++++++--- .../rpl/r/rpl_semi_sync_fail_over.result | 30 +++- .../suite/rpl/t/rpl_circular_semi_sync.test | 163 +++++++++++++----- .../suite/rpl/t/rpl_semi_sync_fail_over.test | 21 ++- .../r/sysvars_server_notembedded.result | 2 +- sql/log.cc | 6 +- sql/log.h | 2 +- sql/rpl_gtid.cc | 9 +- sql/rpl_gtid.h | 3 +- sql/rpl_mi.cc | 3 +- sql/rpl_mi.h | 14 ++ sql/slave.cc | 31 ++-- sql/sys_vars.cc | 5 +- 14 files changed, 319 insertions(+), 107 deletions(-) diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 2a1581515282c..bed2f6c86be07 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -360,7 +360,9 @@ The following specify which files/extra groups are read (specified before remain --gtid-strict-mode Enforce strict seq_no ordering of events in the binary log. Slave stops with an error if it encounters an event that would cause it to generate an out-of-order binlog if - executed. + executed. When ON the same server-id semisync-replicated + transactions that duplicate exising ones in binlog are + ignored without error and slave interruption. -?, --help Display this help and exit. --histogram-size=# Number of bytes used for a histogram. If set to 0, no histograms are created by ANALYZE. diff --git a/mysql-test/suite/rpl/r/rpl_circular_semi_sync.result b/mysql-test/suite/rpl/r/rpl_circular_semi_sync.result index dcced9833ca25..5664b7913d22d 100644 --- a/mysql-test/suite/rpl/r/rpl_circular_semi_sync.result +++ b/mysql-test/suite/rpl/r/rpl_circular_semi_sync.result @@ -1,12 +1,9 @@ include/master-slave.inc [connection master] -# Master server_1 and Slave server_2 initialiation ... +# Master server_1 and Slave server_2 initialization ... connection server_2; include/stop_slave.inc connection server_1; -set @@sql_log_bin = off; -call mtr.add_suppression("Slave: An attempt was made to binlog GTID 10-1-1 which would create an out-of-order sequence number with existing GTID"); -set @@sql_log_bin = on; RESET MASTER; set @@session.gtid_domain_id=10; set @@global.rpl_semi_sync_master_enabled = 1; @@ -22,31 +19,48 @@ Warnings: Warning 1948 Specified value for @@gtid_slave_pos contains no value for replication domain 0. This conflicts with the binary log which contains GTID 0-2-1. If MASTER_GTID_POS=CURRENT_POS is used, the binlog position will override the new value of @@gtid_slave_pos CHANGE MASTER TO master_use_gtid= slave_pos; include/start_slave.inc -# ... server_1 -> server_2 is set up +# server_1 -> server_2 semisync link is set up. connection server_1; -CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=Innodb; -INSERT INTO t1 VALUES (1); +CREATE TABLE t1 (a INT PRIMARY KEY, b INT default 0) ENGINE=Innodb; +INSERT INTO t1(a) VALUES (1); +include/save_master_gtid.inc connection server_2; -# Circular configuration server_2 -> server_1 initialiation ... +include/sync_with_master_gtid.inc +# Circular configuration server_1 -> server_2 -> server_1 ... connection server_1; -# A. ... first when server_1 is in gtid strict mode... set @@global.gtid_strict_mode = true; set @@global.rpl_semi_sync_slave_enabled = 1; CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_2, master_user='root', master_use_gtid=SLAVE_POS; -# ... only for it to fail 'cos if its inconsistent (empty) slave's gtid state: -SELECT @@global.gtid_slave_pos; -@@global.gtid_slave_pos - -START SLAVE; -include/wait_for_slave_sql_error.inc [errno=1950] -# B. ... Resume on the circular setup with the server_id now in the non-strict mode ... -set @@global.gtid_strict_mode = false; include/start_slave.inc -# ... to have succeeded. +... is done. +## A. no out-of-order gtid error for own transaction made round trip +connection server_2; +set @@global.gtid_strict_mode = true; +set @@global.rpl_semi_sync_master_enabled = 1; +INSERT INTO t1(a) VALUES (2); +include/save_master_gtid.inc +connection server_1; +# +# the successful sync is a required proof +# +include/sync_with_master_gtid.inc +update t1 set b=b+1 where a=2; +include/save_master_gtid.inc connection server_2; -INSERT INTO t1 VALUES (2); +include/sync_with_master_gtid.inc +# Post-execution state check on both servers synchronized with each other connection server_1; -INSERT INTO t1 VALUES (3); +# ... the gtid states on server_1 +SHOW VARIABLES LIKE 'gtid_slave_pos'; +Variable_name Value +gtid_slave_pos 0-2-1,10-1-3,20-2-1 +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-2-1,10-1-3,20-2-1 +SELECT * from t1; +a b +1 0 +2 1 connection server_2; # The gtid states on server_2 must be equal to ... SHOW VARIABLES LIKE 'gtid_binlog_pos'; @@ -55,22 +69,95 @@ gtid_binlog_pos 0-2-1,10-1-3,20-2-1 SHOW VARIABLES LIKE 'gtid_slave_pos'; Variable_name Value gtid_slave_pos 0-2-1,10-1-3,20-2-1 +SELECT * from t1; +a b +1 0 +2 1 +## B. out-of-order gtid error for a "foreign" server-id transaction +connection server_1; +set statement sql_log_bin=0 for call mtr.add_suppression("Slave: An attempt was made to binlog GTID 10-2-4"); +set @@session.server_id=2; +INSERT INTO t1(a) VALUES (3); +set @@session.server_id=default; +include/save_master_gtid.inc +connection server_2; +include/sync_with_master_gtid.inc +INSERT INTO t1(a) VALUES (4); +include/save_master_gtid.inc +connection server_1; +include/wait_for_slave_sql_error.inc [errno=1950] +set sql_slave_skip_counter=1; +include/start_slave.inc +include/sync_with_master_gtid.inc +connection server_2; +set statement sql_log_bin=0 for call mtr.add_suppression("Slave: An attempt was made to binlog GTID 20-1-3"); +set @@session.server_id=1; +INSERT INTO t1(a) VALUES (5); +set @@session.server_id=default; +include/save_master_gtid.inc +connection server_1; +include/sync_with_master_gtid.inc +INSERT INTO t1(a) VALUES (6); +include/save_master_gtid.inc +connection server_2; +include/wait_for_slave_sql_error.inc [errno=1950] +set sql_slave_skip_counter=1; +include/start_slave.inc +include/sync_with_master_gtid.inc +# Post-execution state check on both servers synchronized with each other connection server_1; # ... the gtid states on server_1 SHOW VARIABLES LIKE 'gtid_slave_pos'; Variable_name Value -gtid_slave_pos 0-2-1,10-1-3,20-2-1 +gtid_slave_pos 0-2-1,10-1-5,20-1-3 SHOW VARIABLES LIKE 'gtid_binlog_pos'; Variable_name Value -gtid_binlog_pos 0-2-1,10-1-3,20-2-1 +gtid_binlog_pos 0-2-1,10-1-5,20-1-3 +SELECT * from t1; +a b +1 0 +2 1 +3 0 +4 0 +5 0 +6 0 +connection server_2; +include/sync_with_master_gtid.inc +# The gtid states on server_2 must be equal to ... +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +Variable_name Value +gtid_binlog_pos 0-2-1,10-1-5,20-1-3 +SHOW VARIABLES LIKE 'gtid_slave_pos'; +Variable_name Value +gtid_slave_pos 0-2-1,10-1-5,20-1-3 +SELECT * from t1; +a b +1 0 +2 1 +3 0 +4 0 +5 0 +6 0 +# # Cleanup +# +connection server_1; +DROP TABLE t1; +include/save_master_gtid.inc +connection server_2; +include/sync_with_master_gtid.inc connection server_1; include/stop_slave.inc set @@global.rpl_semi_sync_master_enabled = default; set @@global.rpl_semi_sync_slave_enabled = default; set @@global.rpl_semi_sync_master_wait_point=default; -DROP TABLE t1; +set @@global.gtid_ignore_duplicates = default; +set @@global.gtid_strict_mode = default; connection server_2; +include/stop_slave.inc +set @@global.gtid_ignore_duplicates = default; set @@global.rpl_semi_sync_master_enabled = default; set @@global.rpl_semi_sync_slave_enabled = default; +set @@global.gtid_strict_mode = default; +include/start_slave.inc include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result b/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result index 1c15b10da96ca..8956eee2d2f5e 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result @@ -70,6 +70,9 @@ INSERT INTO t1 VALUES (3, 'dummy3'); SHOW VARIABLES LIKE 'gtid_binlog_pos'; Variable_name Value gtid_binlog_pos 0-2-5 +SHOW VARIABLES LIKE 'gtid_binlog_state'; +Variable_name Value +gtid_binlog_state 0-1-4,0-2-5 SHOW VARIABLES LIKE 'gtid_slave_pos'; Variable_name Value gtid_slave_pos 0-1-4 @@ -84,12 +87,15 @@ gtid_slave_pos 0-2-5 SHOW VARIABLES LIKE 'gtid_binlog_pos'; Variable_name Value gtid_binlog_pos 0-2-5 +SHOW VARIABLES LIKE 'gtid_binlog_state'; +Variable_name Value +gtid_binlog_state 0-1-4,0-2-5 connection server_2; # # Case:2 # # CRASH the new master, and FAILOVER back to the original -# INSERT INTO t1 VALUES (4, REPEAT("x", 4100)) +# SET STATEMENT server_id=1 FOR INSERT INTO t1 VALUES (4, REPEAT("x", 4100)) # INSERT INTO t1 VALUES (5, REPEAT("x", 4100)) # Rows 4 and 5 will be in master's binlog but not committed, they get # replicated to slave and applied. On crash master should have 3 rows @@ -98,14 +104,14 @@ connection server_2; # Expected State post crash: #================================================================= # Master | Slave | -# 0-2-6 (Not commited) | 0-2-6 (Received through semi-sync | +# 0-1-6 (Not commited) | 0-1-6 (Received through semi-sync | # | replication and applied) | # 0-2-7 (Not commited) | 0-2-7 (Received through semi-sync | # | replication and applied) | #================================================================= connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_2,; SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go"; -INSERT INTO t1 VALUES (4, REPEAT("x", 4100)); +SET STATEMENT server_id=1 FOR INSERT INTO t1 VALUES (4, REPEAT("x", 4100)); connect conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,; SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; SET GLOBAL debug_dbug="d,Notify_binlog_EOF"; @@ -123,7 +129,7 @@ SELECT @@GLOBAL.gtid_current_pos; # restart: --skip-slave-start=1 --rpl-semi-sync-slave-enabled=1 connection server_2; include/assert.inc [Table t1 should have 3 rows.] -FOUND 1 /truncated binlog file:.*slave.*000002/ in mysqld.2.err +FOUND 1 /truncated binlog file:.*slave.*000002.* to remove transactions starting from GTID 0-1-6/ in mysqld.2.err disconnect conn_client; connection server_1; set global rpl_semi_sync_master_enabled = 1; @@ -134,7 +140,7 @@ set global rpl_semi_sync_slave_enabled = 1; set @@global.gtid_slave_pos=@@global.gtid_binlog_pos; include/start_slave.inc # -# Server_1 promoted as master will send 0-2-6 and 0-2-7 to slave Server_2 +# Server_1 promoted as master will send 0-1-6 and 0-2-7 to slave Server_2 # connection server_1; INSERT INTO t1 VALUES (6, 'dummy6'); @@ -142,6 +148,9 @@ INSERT INTO t1 VALUES (6, 'dummy6'); SHOW VARIABLES LIKE 'gtid_binlog_pos'; Variable_name Value gtid_binlog_pos 0-1-8 +SHOW VARIABLES LIKE 'gtid_binlog_state'; +Variable_name Value +gtid_binlog_state 0-2-7,0-1-8 SHOW VARIABLES LIKE 'gtid_slave_pos'; Variable_name Value gtid_slave_pos 0-2-7 @@ -156,6 +165,9 @@ gtid_slave_pos 0-1-8 SHOW VARIABLES LIKE 'gtid_binlog_pos'; Variable_name Value gtid_binlog_pos 0-1-8 +SHOW VARIABLES LIKE 'gtid_binlog_state'; +Variable_name Value +gtid_binlog_state 0-2-7,0-1-8 include/diff_tables.inc [server_1:t1, server_2:t1] connection server_1; # @@ -196,7 +208,7 @@ SELECT @@GLOBAL.gtid_current_pos; # restart: --skip-slave-start=1 --rpl-semi-sync-slave-enabled=1 connection server_1; include/assert.inc [Table t1 should have 6 rows.] -NOT FOUND /truncated binlog file:.*master.*000003/ in mysqld.1.err +FOUND 1 /truncated binlog file:.*master.*000002.* to remove transactions starting from GTID 0-1-9/ in mysqld.1.err disconnect conn_client; connection server_2; set global rpl_semi_sync_master_enabled = 1; @@ -216,6 +228,9 @@ include/save_master_gtid.inc SHOW VARIABLES LIKE 'gtid_binlog_pos'; Variable_name Value gtid_binlog_pos 0-2-10 +SHOW VARIABLES LIKE 'gtid_binlog_state'; +Variable_name Value +gtid_binlog_state 0-1-9,0-2-10 SHOW VARIABLES LIKE 'gtid_slave_pos'; Variable_name Value gtid_slave_pos 0-1-9 @@ -231,6 +246,9 @@ gtid_slave_pos 0-2-10 SHOW VARIABLES LIKE 'gtid_binlog_pos'; Variable_name Value gtid_binlog_pos 0-2-10 +SHOW VARIABLES LIKE 'gtid_binlog_state'; +Variable_name Value +gtid_binlog_state 0-1-9,0-2-10 # # Cleanup # diff --git a/mysql-test/suite/rpl/t/rpl_circular_semi_sync.test b/mysql-test/suite/rpl/t/rpl_circular_semi_sync.test index 51fa5a242ea5e..267fa62194590 100644 --- a/mysql-test/suite/rpl/t/rpl_circular_semi_sync.test +++ b/mysql-test/suite/rpl/t/rpl_circular_semi_sync.test @@ -1,115 +1,188 @@ # ==== References ==== # # MDEV-27760 event may non stop replicate in circular semisync setup -# +# MDEV-28609 refine gtid-strict-mode to ignore same server-id gtid from the past +# on semisync slave --source include/have_innodb.inc ---source include/have_binlog_format_row.inc +--source include/have_binlog_format_mixed.inc --source include/master-slave.inc # The following tests prove -# A. out-of-order gtid error when the stict gtid mode semisync slave -# receives the same server-id gtid event inconsistent -# (rpl_semi_sync_fail_over tests the consistent case) with its state; -# B. in the non-strict mode the same server-id events remains ignored -# by default as usual. -# ---echo # Master server_1 and Slave server_2 initialiation ... +# A. +# no out-of-order gtid error is done to the stict gtid mode semisync +# slave receives the same server-id gtid event from the past (of its gtid +# state). Such transaction is silently ignored similarly to +# replicate_same_sever_id; and +# B. +# In contrast to A. the out-of-order gtid error is thrown when a "foreign" +# server-id transaction makes its round-trip to the originator server. + +--echo # Master server_1 and Slave server_2 initialization ... --connection server_2 --source include/stop_slave.inc # Initial master --connection server_1 -set @@sql_log_bin = off; -call mtr.add_suppression("Slave: An attempt was made to binlog GTID 10-1-1 which would create an out-of-order sequence number with existing GTID"); -set @@sql_log_bin = on; - RESET MASTER; - set @@session.gtid_domain_id=10; - set @@global.rpl_semi_sync_master_enabled = 1; set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC; --connection server_2 RESET MASTER; ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; - set @@session.gtid_domain_id=20; - set @@global.rpl_semi_sync_slave_enabled = 1; --echo # a 1948 warning is expected set @@global.gtid_slave_pos = ""; CHANGE MASTER TO master_use_gtid= slave_pos; --source include/start_slave.inc ---echo # ... server_1 -> server_2 is set up +--echo # server_1 -> server_2 semisync link is set up. --connection server_1 -CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=Innodb; -INSERT INTO t1 VALUES (1); ---save_master_pos +CREATE TABLE t1 (a INT PRIMARY KEY, b INT default 0) ENGINE=Innodb; +INSERT INTO t1(a) VALUES (1); +--source include/save_master_gtid.inc --connection server_2 ---sync_with_master +--source include/sync_with_master_gtid.inc ---echo # Circular configuration server_2 -> server_1 initialiation ... +--echo # Circular configuration server_1 -> server_2 -> server_1 ... --connection server_1 ---echo # A. ... first when server_1 is in gtid strict mode... set @@global.gtid_strict_mode = true; set @@global.rpl_semi_sync_slave_enabled = 1; evalp CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_2, master_user='root', master_use_gtid=SLAVE_POS; - ---echo # ... only for it to fail 'cos if its inconsistent (empty) slave's gtid state: -SELECT @@global.gtid_slave_pos; -START SLAVE; -# ER_GTID_STRICT_OUT_OF_ORDER ---let $slave_sql_errno = 1950 ---source include/wait_for_slave_sql_error.inc - ---echo # B. ... Resume on the circular setup with the server_id now in the non-strict mode ... -set @@global.gtid_strict_mode = false; --source include/start_slave.inc +--echo ... is done. ---echo # ... to have succeeded. +--echo ## A. no out-of-order gtid error for own transaction made round trip +# A0. server_1 has already originated the transaction +--let $wait_condition=select @@gtid_slave_pos=@@gtid_binlog_pos +--source include/wait_condition.inc + +# A1. server_2 originates --connection server_2 -INSERT INTO t1 VALUES (2); ---save_master_pos +set @@global.gtid_strict_mode = true; +set @@global.rpl_semi_sync_master_enabled = 1; +INSERT INTO t1(a) VALUES (2); +--source include/save_master_gtid.inc --connection server_1 ---sync_with_master +--echo # +--echo # the successful sync is a required proof +--echo # +--source include/sync_with_master_gtid.inc +# A2. server_1 is originating now +update t1 set b=b+1 where a=2; +--source include/save_master_gtid.inc -INSERT INTO t1 VALUES (3); ---save_master_pos +--connection server_2 +--source include/sync_with_master_gtid.inc + +--echo # Post-execution state check on both servers synchronized with each other +--connection server_1 +--echo # ... the gtid states on server_1 +--let $wait_condition=select @@gtid_slave_pos=@@gtid_binlog_pos +--source include/wait_condition.inc +SHOW VARIABLES LIKE 'gtid_slave_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +SELECT * from t1; --connection server_2 ---sync_with_master --echo # The gtid states on server_2 must be equal to ... --let $wait_condition=select @@gtid_slave_pos=@@gtid_binlog_pos --source include/wait_condition.inc SHOW VARIABLES LIKE 'gtid_binlog_pos'; SHOW VARIABLES LIKE 'gtid_slave_pos'; +SELECT * from t1; + +--echo ## B. out-of-order gtid error for a "foreign" server-id transaction +# B1. circulation starts from server_1 + +--connection server_1 +set statement sql_log_bin=0 for call mtr.add_suppression("Slave: An attempt was made to binlog GTID 10-2-4"); +set @@session.server_id=2; +INSERT INTO t1(a) VALUES (3); +set @@session.server_id=default; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc +INSERT INTO t1(a) VALUES (4); +--source include/save_master_gtid.inc + +--connection server_1 +--let $slave_sql_errno = 1950 +--source include/wait_for_slave_sql_error.inc +set sql_slave_skip_counter=1; +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + +# B2. circulation starts from server_2 +--connection server_2 +set statement sql_log_bin=0 for call mtr.add_suppression("Slave: An attempt was made to binlog GTID 20-1-3"); +set @@session.server_id=1; +INSERT INTO t1(a) VALUES (5); +set @@session.server_id=default; +--source include/save_master_gtid.inc +--connection server_1 +--source include/sync_with_master_gtid.inc +INSERT INTO t1(a) VALUES (6); +--source include/save_master_gtid.inc + + +--connection server_2 +--let $slave_sql_errno = 1950 +--source include/wait_for_slave_sql_error.inc +set sql_slave_skip_counter=1; +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc + +--echo # Post-execution state check on both servers synchronized with each other --connection server_1 --echo # ... the gtid states on server_1 --let $wait_condition=select @@gtid_slave_pos=@@gtid_binlog_pos --source include/wait_condition.inc SHOW VARIABLES LIKE 'gtid_slave_pos'; SHOW VARIABLES LIKE 'gtid_binlog_pos'; +SELECT * from t1; + +--connection server_2 +--source include/sync_with_master_gtid.inc +--echo # The gtid states on server_2 must be equal to ... +--let $wait_condition=select @@gtid_slave_pos=@@gtid_binlog_pos +--source include/wait_condition.inc +SHOW VARIABLES LIKE 'gtid_binlog_pos'; +SHOW VARIABLES LIKE 'gtid_slave_pos'; +SELECT * from t1; +--echo # --echo # Cleanup +--echo # +--connection server_1 +DROP TABLE t1; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc + --connection server_1 --source include/stop_slave.inc set @@global.rpl_semi_sync_master_enabled = default; set @@global.rpl_semi_sync_slave_enabled = default; set @@global.rpl_semi_sync_master_wait_point=default; - -DROP TABLE t1; ---save_master_pos +set @@global.gtid_ignore_duplicates = default; +set @@global.gtid_strict_mode = default; --connection server_2 ---sync_with_master +--source include/stop_slave.inc +set @@global.gtid_ignore_duplicates = default; set @@global.rpl_semi_sync_master_enabled = default; set @@global.rpl_semi_sync_slave_enabled = default; +set @@global.gtid_strict_mode = default; +--source include/start_slave.inc --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test index 0505e88d758bf..6a691ae04f6e2 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test @@ -1,6 +1,7 @@ # ==== References ==== # -# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server +# MDEV-21117 recovery for --rpl-semi-sync-slave-enabled server +# MDEV-27760 event may non stop replicate in circular semisync setup # --source include/have_innodb.inc @@ -72,6 +73,7 @@ INSERT INTO t1 VALUES (1, 'dummy1'); --save_master_pos --echo # The gtid state on current master must be equal to ... SHOW VARIABLES LIKE 'gtid_binlog_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_state'; SHOW VARIABLES LIKE 'gtid_slave_pos'; --connection server_1 @@ -80,6 +82,7 @@ SHOW VARIABLES LIKE 'gtid_slave_pos'; --echo # ... the gtid states on the slave: SHOW VARIABLES LIKE 'gtid_slave_pos'; SHOW VARIABLES LIKE 'gtid_binlog_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_state'; --connection server_2 --let $case = 2 @@ -89,7 +92,9 @@ SHOW VARIABLES LIKE 'gtid_binlog_pos'; --echo # CRASH the new master, and FAILOVER back to the original # value 0 for the reverse server id 2 -> 1 failover --let $failover_to_slave=0 ---let $query_to_crash = INSERT INTO t1 VALUES (4, REPEAT("x", 4100)) +# Additionally through "foreign" server_id verify MDEV-27760's acceptance +# policy on the recient (to be promoted into master) server. +--let $query_to_crash = SET STATEMENT server_id=1 FOR INSERT INTO t1 VALUES (4, REPEAT("x", 4100)) --let $query2_to_crash= INSERT INTO t1 VALUES (5, REPEAT("x", 4100)) --echo # $query_to_crash --echo # $query2_to_crash @@ -100,18 +105,18 @@ SHOW VARIABLES LIKE 'gtid_binlog_pos'; --echo # Expected State post crash: --echo #================================================================= --echo # Master | Slave | ---echo # 0-2-6 (Not commited) | 0-2-6 (Received through semi-sync | +--echo # 0-1-6 (Not commited) | 0-1-6 (Received through semi-sync | --echo # | replication and applied) | --echo # 0-2-7 (Not commited) | 0-2-7 (Received through semi-sync | --echo # | replication and applied) | --echo #================================================================= ---let $log_search_pattern=truncated binlog file:.*slave.*000002 +--let $log_search_pattern=truncated binlog file:.*slave.*000002.* to remove transactions starting from GTID 0-1-6 --let $expected_rows_on_master= 3 --let $expected_rows_on_slave= 5 --source rpl_semi_sync_crash.inc --echo # ---echo # Server_1 promoted as master will send 0-2-6 and 0-2-7 to slave Server_2 +--echo # Server_1 promoted as master will send 0-1-6 and 0-2-7 to slave Server_2 --echo # --connection server_1 --let $rows_so_far=6 @@ -119,6 +124,7 @@ SHOW VARIABLES LIKE 'gtid_binlog_pos'; --save_master_pos --echo # The gtid state on current master must be equal to ... SHOW VARIABLES LIKE 'gtid_binlog_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_state'; SHOW VARIABLES LIKE 'gtid_slave_pos'; --connection server_2 @@ -127,6 +133,7 @@ SHOW VARIABLES LIKE 'gtid_slave_pos'; --echo # ... the gtid states on the slave: SHOW VARIABLES LIKE 'gtid_slave_pos'; SHOW VARIABLES LIKE 'gtid_binlog_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_state'; --let $diff_tables=server_1:t1, server_2:t1 --source include/diff_tables.inc @@ -154,7 +161,7 @@ SHOW VARIABLES LIKE 'gtid_binlog_pos'; --echo # 0-1-10 (Not commited - | | --echo # never sent to slave) | | --echo #================================================================= ---let $log_search_pattern=truncated binlog file:.*master.*000003 +--let $log_search_pattern=truncated binlog file:.*master.*000002.* to remove transactions starting from GTID 0-1-9 --let $expected_rows_on_master= 6 --let $expected_rows_on_slave= 7 --source rpl_semi_sync_crash.inc @@ -168,6 +175,7 @@ SHOW VARIABLES LIKE 'gtid_binlog_pos'; --source include/save_master_gtid.inc --echo # The gtid state on current master must be equal to ... SHOW VARIABLES LIKE 'gtid_binlog_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_state'; SHOW VARIABLES LIKE 'gtid_slave_pos'; --connection server_1 @@ -176,6 +184,7 @@ SHOW VARIABLES LIKE 'gtid_slave_pos'; --echo # ... the gtid states on the slave: SHOW VARIABLES LIKE 'gtid_slave_pos'; SHOW VARIABLES LIKE 'gtid_binlog_pos'; +SHOW VARIABLES LIKE 'gtid_binlog_state'; --echo # --echo # Cleanup diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 7b811a011ff64..a661a806ff6f3 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -1185,7 +1185,7 @@ COMMAND_LINE_ARGUMENT NULL VARIABLE_NAME GTID_STRICT_MODE VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BOOLEAN -VARIABLE_COMMENT Enforce strict seq_no ordering of events in the binary log. Slave stops with an error if it encounters an event that would cause it to generate an out-of-order binlog if executed. +VARIABLE_COMMENT Enforce strict seq_no ordering of events in the binary log. Slave stops with an error if it encounters an event that would cause it to generate an out-of-order binlog if executed. When ON the same server-id semisync-replicated transactions that duplicate exising ones in binlog are ignored without error and slave interruption. NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL diff --git a/sql/log.cc b/sql/log.cc index 81fe4b1e97db7..11dd979715c8c 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -6463,11 +6463,13 @@ MYSQL_BIN_LOG::bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no) bool MYSQL_BIN_LOG::check_strict_gtid_sequence(uint32 domain_id, uint32 server_id_arg, - uint64 seq_no) + uint64 seq_no, + bool no_error) { return rpl_global_gtid_binlog_state.check_strict_sequence(domain_id, server_id_arg, - seq_no); + seq_no, + no_error); } diff --git a/sql/log.h b/sql/log.h index 516fb36adb905..aec48263d844d 100644 --- a/sql/log.h +++ b/sql/log.h @@ -920,7 +920,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG bool lookup_domain_in_binlog_state(uint32 domain_id, rpl_gtid *out_gtid); int bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no); bool check_strict_gtid_sequence(uint32 domain_id, uint32 server_id, - uint64 seq_no); + uint64 seq_no, bool no_error= false); /** * used when opening new file, and binlog_end_pos moves backwards diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index eecc6040051e8..fce5f2606394f 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -1734,7 +1734,7 @@ rpl_binlog_state::alloc_element_nolock(const rpl_gtid *gtid) */ bool rpl_binlog_state::check_strict_sequence(uint32 domain_id, uint32 server_id, - uint64 seq_no) + uint64 seq_no, bool no_error) { element *elem; bool res= 0; @@ -1744,9 +1744,10 @@ rpl_binlog_state::check_strict_sequence(uint32 domain_id, uint32 server_id, (const uchar *)(&domain_id), 0)) && elem->last_gtid && elem->last_gtid->seq_no >= seq_no) { - my_error(ER_GTID_STRICT_OUT_OF_ORDER, MYF(0), domain_id, server_id, seq_no, - elem->last_gtid->domain_id, elem->last_gtid->server_id, - elem->last_gtid->seq_no); + if (!no_error) + my_error(ER_GTID_STRICT_OUT_OF_ORDER, MYF(0), domain_id, server_id, seq_no, + elem->last_gtid->domain_id, elem->last_gtid->server_id, + elem->last_gtid->seq_no); res= 1; } mysql_mutex_unlock(&LOCK_binlog_state); diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h index 531d746763bc8..c8decff8fe857 100644 --- a/sql/rpl_gtid.h +++ b/sql/rpl_gtid.h @@ -317,7 +317,8 @@ struct rpl_binlog_state int update_with_next_gtid(uint32 domain_id, uint32 server_id, rpl_gtid *gtid); int alloc_element_nolock(const rpl_gtid *gtid); - bool check_strict_sequence(uint32 domain_id, uint32 server_id, uint64 seq_no); + bool check_strict_sequence(uint32 domain_id, uint32 server_id, uint64 seq_no, + bool no_error= false); int bump_seq_no_if_needed(uint32 domain_id, uint64 seq_no); int write_to_iocache(IO_CACHE *dest); int read_from_iocache(IO_CACHE *src); diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc index cdd5697654945..8322bcd304294 100644 --- a/sql/rpl_mi.cc +++ b/sql/rpl_mi.cc @@ -43,7 +43,8 @@ Master_info::Master_info(LEX_CSTRING *connection_name_arg, gtid_reconnect_event_skip_count(0), gtid_event_seen(false), in_start_all_slaves(0), in_stop_all_slaves(0), in_flush_all_relay_logs(0), users(0), killed(0), - total_ddl_groups(0), total_non_trans_groups(0), total_trans_groups(0) + total_ddl_groups(0), total_non_trans_groups(0), total_trans_groups(0), + do_accept_own_server_id(false) { char *tmp; host[0] = 0; user[0] = 0; password[0] = 0; diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h index ce2d3cc9ad50f..1377a816d482f 100644 --- a/sql/rpl_mi.h +++ b/sql/rpl_mi.h @@ -352,6 +352,20 @@ class Master_info : public Slave_reporting_capability ACK from slave, or if delay_master is enabled. */ int semi_ack; + /* + The flag has replicate_same_server_id semantics and is raised to accept + a same-server-id event group by the gtid strict mode semisync slave. + Own server-id events can normally appear as result of EITHER + A. this server semisync (failover to) slave crash-recovery: + the transaction was created on this server then being master, + got replicated elsewhere right before the crash before commit, + and finally at recovery the transaction gets evicted from the + server's binlog and its gtid (slave) state; OR + B. in a general circular configuration and then when a recieved (returned + to slave) gtid exists in the server's binlog. Then, in gtid strict mode, + it must be ignored similarly to the replicate-same-server-id rule. + */ + bool do_accept_own_server_id; }; int init_master_info(Master_info* mi, const char* master_info_fname, diff --git a/sql/slave.cc b/sql/slave.cc index 9321598d06db6..a0d725ca31887 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -5047,6 +5047,7 @@ log space"); mi->abort_slave= 0; mi->slave_running= MYSQL_SLAVE_NOT_RUN; mi->io_thd= 0; + mi->do_accept_own_server_id= false; /* Note: the order of the two following calls (first broadcast, then unlock) is important. Otherwise a killer_thread can execute between the calls and @@ -6185,15 +6186,6 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) uchar new_buf_arr[4096]; bool is_malloc = false; bool is_rows_event= false; - /* - The flag has replicate_same_server_id semantics and is raised to accept - a same-server-id event group by the gtid strict mode semisync slave. - Own server-id events can appear as result of this server crash-recovery: - the transaction was created on this server then being master, got replicated - elsewhere right before the crash before commit; - finally at recovery the transaction gets evicted from the server's binlog. - */ - bool do_accept_own_server_id; /* FD_q must have been prepared for the first R_a event inside get_master_version_and_clock() @@ -6783,6 +6775,19 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) ++mi->events_queued_since_last_gtid; inc_pos= event_len; + + /* + To compute `true` is normal for this *now* semisync slave server when + it has passed its crash-recovery as a former master. + */ + mi->do_accept_own_server_id= + (s_id == global_system_variables.server_id && + rpl_semi_sync_slave_enabled && opt_gtid_strict_mode && + mi->using_gtid != Master_info::USE_GTID_NO && + !mysql_bin_log.check_strict_gtid_sequence(event_gtid.domain_id, + event_gtid.server_id, + event_gtid.seq_no, + true)); // ...} eof else_likely } break; @@ -6965,10 +6970,6 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) break; } - do_accept_own_server_id= (s_id == global_system_variables.server_id - && rpl_semi_sync_slave_enabled && opt_gtid_strict_mode - && mi->using_gtid != Master_info::USE_GTID_NO); - /* Integrity of Rows- event group check. A sequence of Rows- events must end with STMT_END_F flagged one. @@ -7059,7 +7060,7 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) else if ((s_id == global_system_variables.server_id && !(mi->rli.replicate_same_server_id || - do_accept_own_server_id)) || + mi->do_accept_own_server_id)) || event_that_should_be_ignored(buf) || /* the following conjunction deals with IGNORE_SERVER_IDS, if set @@ -7119,7 +7120,7 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) } else { - if (do_accept_own_server_id) + if (mi->do_accept_own_server_id) { int2store(const_cast(buf + FLAGS_OFFSET), uint2korr(buf + FLAGS_OFFSET) | LOG_EVENT_ACCEPT_OWN_F); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 114714acfe6ac..b176d55134fab 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2030,7 +2030,10 @@ Sys_gtid_strict_mode( "gtid_strict_mode", "Enforce strict seq_no ordering of events in the binary log. Slave " "stops with an error if it encounters an event that would cause it to " - "generate an out-of-order binlog if executed.", + "generate an out-of-order binlog if executed. " + "When ON the same server-id semisync-replicated transactions that " + "duplicate exising ones in binlog are ignored without error " + "and slave interruption.", GLOBAL_VAR(opt_gtid_strict_mode), CMD_LINE(OPT_ARG), DEFAULT(FALSE));