Skip to content

Commit

Permalink
MDEV-25958: rpl_semi_sync_fail_over.test fails in buildbot
Browse files Browse the repository at this point in the history
Analysis:
========
In case multi binlog truncation scenario debug sync points are in the
following order.

Two inserts are done on master as shown below.

INSERT INTO t1 VALUES (4, REPEAT("x", 4100)
commit_after_release_LOCK_after_binlog_sync

INSERT INTO t1 VALUES (5, REPEAT("x", 4100)
commit_after_release_LOCK_log

First insert debug sync ensures that transaction is synced to binlog and
not committed but it reached slave through semi sync.

Second insert debug sync ensures that transaction is synced to binlog and
not committed. It doesn't ensure that 'INSERT 5' reached slave.

Most of the times INSERT 5 reaches slave, hence when it is promoted as
master it sends 4,5 to slave. But occasionally 5 may not reach slave in
those cases post recovery master will have only 4. When row 6 is inserted
Master has 4-6 and Slave has 4,5,6.

This results in test failure.

Fix:
===
For the first insert use 'commit_before_get_LOCK_commit_ordered' debug sync
point, it will ensure that binlog was sent to slave and slave has
acknowledged the receipt. Now enable debug code such that when the next
transaction is written to binary log, dump thread will read and send it
across the network and notify the server to be get killed. Insert row 5
and wait for notification from dump thread. Kill the server. This ensures
that both 4 and 5 have reached the semi-sync slave.

Added a new test case:
Insert two rows on master such that first is present in master's binlog and
reached semi sync slave. Second insert should be flushed to binlog but not
sent to slave. Now crash and fail over to slave. The promoted master will send
the extra transaction to slave.
  • Loading branch information
sujatha-s committed Aug 19, 2021
1 parent 583516b commit 475f69b
Show file tree
Hide file tree
Showing 6 changed files with 320 additions and 83 deletions.
178 changes: 152 additions & 26 deletions mysql-test/suite/rpl/r/rpl_semi_sync_fail_over.result
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,41 @@ connection server_1;
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
set @@global.rpl_semi_sync_master_enabled = 1;
set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC;
call mtr.add_suppression("Can.t init tc log");
call mtr.add_suppression("Aborting");
call mtr.add_suppression("1 client is using or hasn.t closed the table properly");
call mtr.add_suppression("Table './mtr/test_suppressions' is marked as crashed and should be repaired");
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
INSERT INTO t1 VALUES (1, 'dummy1');
connection server_2;
connection server_1;
#
# Case:1
#
# CRASH the original master, and FAILOVER to the new
# INSERT INTO t1 VALUES (2, REPEAT("x", 4100))
# Row - 2 will be in master's binlog but not committed, gets replicated
# to slave and applied. On crash master should have 1 row and slave
# should have 2 rows.
#
# Expected State post crash:
#=================================================================
# Master | Slave |
# 0-1-4 (Not committed) | 0-1-4 (Received through semi-sync |
# | replication and applied) |
#=================================================================
connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_1,;
SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go";
INSERT INTO t1 VALUES (2, REPEAT("x", 4100));
connection server_1;
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
# Kill the server
connection server_2;
include/wait_for_slave_param.inc [Slave_SQL_Running_State]
include/stop_slave.inc
include/assert.inc [Table t1 should have 2 rows.]
SELECT @@GLOBAL.gtid_current_pos;
@@GLOBAL.gtid_current_pos
0-1-8
# restart: --rpl-semi-sync-slave-enabled=1
0-1-4
# restart: --skip-slave-start=1 --rpl-semi-sync-slave-enabled=1
connection server_1;
include/assert.inc [Table t1 should have 1 rows.]
FOUND 1 /truncated binlog file:.*master.*000001/ in mysqld.1.err
disconnect conn_client;
connection server_2;
Expand All @@ -45,42 +61,69 @@ CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_u
set global rpl_semi_sync_slave_enabled = 1;
set @@global.gtid_slave_pos=@@global.gtid_binlog_pos;
include/start_slave.inc
#
# Server_2 promoted as master will send 0-1-4 to new slave Server_1
#
connection server_2;
INSERT INTO t1 VALUES (3, 'dummy3');
# The gtid state on current master must be equal to ...
SHOW VARIABLES LIKE 'gtid_binlog_pos';
Variable_name Value
gtid_binlog_pos 0-2-9
gtid_binlog_pos 0-2-5
SHOW VARIABLES LIKE 'gtid_slave_pos';
Variable_name Value
gtid_slave_pos 0-1-4
connection server_1;
SELECT COUNT(*) = 3 as 'true' FROM t1;
true
1
# ... the gtid states on the slave:
SHOW VARIABLES LIKE 'gtid_slave_pos';
Variable_name Value
gtid_slave_pos 0-2-9
gtid_slave_pos 0-2-5
SHOW VARIABLES LIKE 'gtid_binlog_pos';
Variable_name Value
gtid_binlog_pos 0-2-9
gtid_binlog_pos 0-2-5
connection server_2;
#
# Case:2
#
# CRASH the new master, and FAILOVER back to the original
# INSERT INTO t1 VALUES (4, REPEAT("x", 4100))
# INSERT INTO t1 VALUES (5, REPEAT("x", 4100))
# Rows 4 and 5 will be in master's binlog but not committed, they get
# replicated to slave and applied. On crash master should have 3 rows
# and slave should have 5 rows.
#
# Expected State post crash:
#=================================================================
# Master | Slave |
# 0-2-6 (Not commited) | 0-2-6 (Received through semi-sync |
# | replication and applied) |
# 0-2-7 (Not commited) | 0-2-7 (Received through semi-sync |
# | replication and applied) |
#=================================================================
connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_2,;
SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go";
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
INSERT INTO t1 VALUES (4, REPEAT("x", 4100));
connect conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,;
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con2_go";
SET GLOBAL debug_dbug="d,Notify_binlog_EOF";
INSERT INTO t1 VALUES (5, REPEAT("x", 4100));
connection server_2;
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
SET DEBUG_SYNC= "now WAIT_FOR eof_reached";
# Kill the server
connection server_1;
include/wait_for_slave_param.inc [Slave_SQL_Running_State]
include/stop_slave.inc
include/assert.inc [Table t1 should have 5 rows.]
SELECT @@GLOBAL.gtid_current_pos;
@@GLOBAL.gtid_current_pos
0-2-11
# restart: --rpl-semi-sync-slave-enabled=1
0-2-7
# restart: --skip-slave-start=1 --rpl-semi-sync-slave-enabled=1
connection server_2;
NOT FOUND /truncated binlog file:.*slave.*000001/ in mysqld.2.err
include/assert.inc [Table t1 should have 3 rows.]
FOUND 1 /truncated binlog file:.*slave.*000002/ in mysqld.2.err
disconnect conn_client;
connection server_1;
set global rpl_semi_sync_master_enabled = 1;
Expand All @@ -90,40 +133,123 @@ CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_u
set global rpl_semi_sync_slave_enabled = 1;
set @@global.gtid_slave_pos=@@global.gtid_binlog_pos;
include/start_slave.inc
#
# Server_1 promoted as master will send 0-2-6 and 0-2-7 to slave Server_2
#
connection server_1;
INSERT INTO t1 VALUES (6, 'Done');
INSERT INTO t1 VALUES (6, 'dummy6');
# The gtid state on current master must be equal to ...
SHOW VARIABLES LIKE 'gtid_binlog_pos';
Variable_name Value
gtid_binlog_pos 0-1-12
gtid_binlog_pos 0-1-8
SHOW VARIABLES LIKE 'gtid_slave_pos';
Variable_name Value
gtid_slave_pos 0-2-7
connection server_2;
SELECT COUNT(*) = 6 as 'true' FROM t1;
true
1
# ... the gtid states on the slave:
SHOW VARIABLES LIKE 'gtid_slave_pos';
Variable_name Value
gtid_slave_pos 0-1-12
gtid_slave_pos 0-1-8
SHOW VARIABLES LIKE 'gtid_binlog_pos';
Variable_name Value
gtid_binlog_pos 0-1-12
gtid_binlog_pos 0-1-8
include/diff_tables.inc [server_1:t1, server_2:t1]
# Cleanup
connection server_1;
DROP TABLE t1;
#
# Case:3
#
# CRASH the master and FAILOVER to slave
# INSERT INTO t1 VALUES (7, REPEAT("x", 4100))
# INSERT INTO t1 VALUES (8, REPEAT("x", 4100))
# Rows 7 and 8 will be in master's binlog but not committed, only 7
# gets replicated to slave and applied. On crash master should have 6
# rows and slave should have 7 rows.
#
# Expected State post crash:
#=================================================================
# Master | Slave |
# 0-1-9 (Not commited) | 0-1-9 (Received through semi-sync |
# | replication and applied) |
# 0-1-10 (Not commited - | |
# never sent to slave) | |
#=================================================================
connect conn_client,127.0.0.1,root,,test,$SERVER_MYPORT_1,;
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
INSERT INTO t1 VALUES (7, REPEAT("x", 4100));
connect conn_client_3,127.0.0.1,root,,test,$SERVER_MYPORT_1,;
SET DEBUG_SYNC= "commit_before_update_binlog_end_pos SIGNAL con3_ready WAIT_FOR con1_go";
INSERT INTO t1 VALUES (8, REPEAT("x", 4100));
connection server_1;
SET DEBUG_SYNC= "now WAIT_FOR con3_ready";
# Kill the server
connection server_2;
include/wait_for_slave_param.inc [Slave_SQL_Running_State]
include/stop_slave.inc
include/assert.inc [Table t1 should have 7 rows.]
SELECT @@GLOBAL.gtid_current_pos;
@@GLOBAL.gtid_current_pos
0-1-9
# restart: --skip-slave-start=1 --rpl-semi-sync-slave-enabled=1
connection server_1;
set @@global.rpl_semi_sync_master_enabled = 0;
set @@global.rpl_semi_sync_slave_enabled = 0;
set @@global.rpl_semi_sync_master_wait_point=default;
RESET SLAVE;
include/assert.inc [Table t1 should have 6 rows.]
NOT FOUND /truncated binlog file:.*master.*000003/ in mysqld.1.err
disconnect conn_client;
connection server_2;
set global rpl_semi_sync_master_enabled = 1;
set global rpl_semi_sync_master_wait_point=AFTER_SYNC;
connection server_1;
CHANGE MASTER TO master_host='127.0.0.1', master_port=$new_master_port, master_user='root', master_use_gtid=SLAVE_POS;
set global rpl_semi_sync_slave_enabled = 1;
set @@global.gtid_slave_pos=@@global.gtid_binlog_pos;
include/start_slave.inc
#
# Server_2 promoted as master will send 0-1-9 to slave Server_1
#
connection server_2;
INSERT INTO t1 VALUES (8, 'Done');
include/save_master_gtid.inc
# The gtid state on current master must be equal to ...
SHOW VARIABLES LIKE 'gtid_binlog_pos';
Variable_name Value
gtid_binlog_pos 0-2-10
SHOW VARIABLES LIKE 'gtid_slave_pos';
Variable_name Value
gtid_slave_pos 0-1-9
connection server_1;
include/sync_with_master_gtid.inc
SELECT COUNT(*) = 8 as 'true' FROM t1;
true
1
# ... the gtid states on the slave:
SHOW VARIABLES LIKE 'gtid_slave_pos';
Variable_name Value
gtid_slave_pos 0-2-10
SHOW VARIABLES LIKE 'gtid_binlog_pos';
Variable_name Value
gtid_binlog_pos 0-2-10
#
# Cleanup
#
include/stop_slave.inc
set global rpl_semi_sync_slave_enabled = 0;
set global rpl_semi_sync_master_enabled = 0;
set global rpl_semi_sync_master_wait_point=default;
RESET MASTER;
RESET SLAVE;
connection server_2;
RESET MASTER;
RESET SLAVE;
set @@global.rpl_semi_sync_master_enabled = 0;
set @@global.rpl_semi_sync_slave_enabled = 0;
set @@global.rpl_semi_sync_master_wait_point=default;
CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=no;
CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=SLAVE_POS;
set @@global.gtid_slave_pos=@@global.gtid_binlog_pos;
include/start_slave.inc
connection server_1;
DROP TABLE t1;
connection server_2;
connection default;
include/rpl_end.inc
52 changes: 43 additions & 9 deletions mysql-test/suite/rpl/t/rpl_semi_sync_crash.inc
Original file line number Diff line number Diff line change
Expand Up @@ -20,36 +20,70 @@ if (!$failover_to_slave)

# Hold insert after write to binlog and before "run_commit_ordered" in engine

SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go";
--send_eval $query_to_crash

if ($case == 1)
{
SET DEBUG_SYNC= "commit_after_release_LOCK_after_binlog_sync SIGNAL con1_ready WAIT_FOR con1_go";
--send_eval $query_to_crash
--connection server_$server_to_crash
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
--source include/kill_mysqld.inc
}

# complicate recovery with an extra binlog file
if (!$failover_to_slave)
if ($case == 2)
{
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
--send_eval $query_to_crash
--connect (conn_client_2,127.0.0.1,root,,test,$SERVER_MYPORT_2,)
# use the same signal with $query_to_crash
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con2_go";
SET GLOBAL debug_dbug="d,Notify_binlog_EOF";
--send_eval $query2_to_crash
--connection server_$server_to_crash
SET DEBUG_SYNC= "now WAIT_FOR eof_reached";
--source include/kill_mysqld.inc
}

--connection server_$server_to_crash
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
--source include/kill_mysqld.inc
# complicate recovery with an extra binlog file
if ($case == 3)
{
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
--send_eval $query_to_crash
--connect (conn_client_3,127.0.0.1,root,,test,$SERVER_MYPORT_1,)
# use the same signal with $query_to_crash
SET DEBUG_SYNC= "commit_before_update_binlog_end_pos SIGNAL con3_ready WAIT_FOR con1_go";
--send_eval $query2_to_crash
--connection server_$server_to_crash
SET DEBUG_SYNC= "now WAIT_FOR con3_ready";
--source include/kill_mysqld.inc
}

--connection server_$server_to_promote
--let $slave_param= Slave_SQL_Running_State
--let $slave_param_value= Slave has read all relay log; waiting for more updates
source include/wait_for_slave_param.inc;

--error 2003
--source include/stop_slave.inc

--let $assert_cond= COUNT(*) = $expected_rows_on_slave FROM t1
--let $assert_text= Table t1 should have $expected_rows_on_slave rows.
--source include/assert.inc

SELECT @@GLOBAL.gtid_current_pos;

--let $restart_parameters=--rpl-semi-sync-slave-enabled=1
--let $restart_parameters=--skip-slave-start=1 --rpl-semi-sync-slave-enabled=1
--let $allow_rpl_inited=1
--source include/start_mysqld.inc

--connection server_$server_to_crash
--enable_reconnect
--source include/wait_until_connected_again.inc

--let $assert_cond= COUNT(*) = $expected_rows_on_master FROM t1
--let $assert_text= Table t1 should have $expected_rows_on_master rows.
--source include/assert.inc

# Check error log for correct messages.
let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.$server_to_crash.err;
--let SEARCH_FILE=$log_error_
Expand Down
2 changes: 2 additions & 0 deletions mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.cnf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
[mysqld.1]
log-slave-updates
gtid-strict-mode=1
sync-binlog=1

[mysqld.2]
log-slave-updates
gtid-strict-mode=1
sync-binlog=1
Loading

0 comments on commit 475f69b

Please sign in to comment.