Skip to content

Commit

Permalink
MDEV-28461 semisync-slave server recovery fails to rollback prepared …
Browse files Browse the repository at this point in the history
…transaction

that is not in binlog.

Post-crash recovery of --rpl-semi-sync-slave-enabled server
failed to recognize a transaction in-doubt that needed rolled back.
A prepared-but-not-in-binlog transaction gets committed instead
to possibly create inconsistency with a master (e.g the way it was observed
in the bug report).

The semisync recovery is corrected now with initializing binlog coordinates
of any transaction in-doubt to the maximum offset which is
unreachable.
In effect when a prepared transaction that is not found in binlog
it will be decided to rollback because it's guaranteed to reside
in a truncated tail area of binlog.

Mtr tests are reinforced to cover the described scenario.
  • Loading branch information
andrelkin committed May 13, 2022
1 parent c1063a1 commit da18a5d
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 25 deletions.
87 changes: 71 additions & 16 deletions mysql-test/suite/binlog/r/binlog_truncate_active_log.result
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ RESET MASTER;
SET @@global.sync_binlog=1;
CREATE TABLE t (f INT) ENGINE=INNODB;
CREATE TABLE t2 (f INT) ENGINE=INNODB;
CREATE TABLE t4 (f INT) ENGINE=INNODB;
CREATE TABLE tm (f INT) ENGINE=Aria;
# Case A.
connect master1,localhost,root,,;
connect master2,localhost,root,,;
connect master3,localhost,root,,;
connect master4,localhost,root,,;
connection default;
INSERT INTO t VALUES (10);
INSERT INTO tm VALUES (10);
Expand All @@ -23,14 +25,23 @@ connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash';
Before the crash
0-1-7
0-1-8
connection master4;
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
INSERT INTO t4 VALUES (13);
connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
Before the crash and never logged trx
0-1-8
connection default;
# Kill the server
disconnect master1;
disconnect master2;
disconnect master3;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-6/ in mysqld.1.err
disconnect master4;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-7/ in mysqld.1.err
Pre-crash binlog file content:
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
Expand All @@ -39,6 +50,8 @@ master-bin.000001 # Query # # use `test`; CREATE TABLE t (f INT) ENGINE=INNODB
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; CREATE TABLE t2 (f INT) ENGINE=INNODB
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; CREATE TABLE t4 (f INT) ENGINE=INNODB
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; CREATE TABLE tm (f INT) ENGINE=Aria
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test`; INSERT INTO t VALUES (10)
Expand All @@ -48,16 +61,20 @@ master-bin.000001 # Query # # use `test`; INSERT INTO tm VALUES (10)
master-bin.000001 # Query # # COMMIT
SELECT @@global.gtid_binlog_pos as 'After the crash';
After the crash
0-1-5
0-1-6
"One row should be present in table 't'"
SELECT * FROM t;
f
10
"No row should be present in table 't4'"
SELECT * FROM t4;
f
DELETE FROM t;
# Case B.
connect master1,localhost,root,,;
connect master2,localhost,root,,;
connect master3,localhost,root,,;
connect master4,localhost,root,,;
connection default;
INSERT INTO t VALUES (10);
INSERT INTO tm VALUES (10);
Expand All @@ -72,14 +89,23 @@ connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash';
Before the crash
0-1-10
0-1-11
connection master4;
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
INSERT INTO t4 VALUES (13);
connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
Before the crash and never logged trx
0-1-11
connection default;
# Kill the server
disconnect master1;
disconnect master2;
disconnect master3;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-10/ in mysqld.1.err
disconnect master4;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-11/ in mysqld.1.err
Pre-crash binlog file content:
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
Expand All @@ -97,11 +123,14 @@ master-bin.000002 # Query # # use `test`; DELETE FROM t2 WHERE f = 0
master-bin.000002 # Query # # COMMIT
SELECT @@global.gtid_binlog_pos as 'After the crash';
After the crash
0-1-9
0-1-10
"One row should be present in table 't'"
SELECT * FROM t;
f
10
"No row should be present in table 't4'"
SELECT * FROM t4;
f
DELETE FROM t;
# Case C.
CREATE PROCEDURE sp_blank_xa()
Expand All @@ -114,6 +143,7 @@ END|
connect master1,localhost,root,,;
connect master2,localhost,root,,;
connect master3,localhost,root,,;
connect master4,localhost,root,,;
connection default;
INSERT INTO t VALUES (10);
INSERT INTO tm VALUES (10);
Expand All @@ -128,14 +158,23 @@ connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash';
Before the crash
0-1-15
0-1-16
connection master4;
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
INSERT INTO t4 VALUES (13);
connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
Before the crash and never logged trx
0-1-16
connection default;
# Kill the server
disconnect master1;
disconnect master2;
disconnect master3;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-14/ in mysqld.1.err
disconnect master4;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-15/ in mysqld.1.err
Pre-crash binlog file content:
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
Expand All @@ -158,11 +197,14 @@ master-bin.000003 # Query # # use `test`; INSERT INTO tm VALUES (10)
master-bin.000003 # Query # # COMMIT
SELECT @@global.gtid_binlog_pos as 'After the crash';
After the crash
0-1-13
0-1-14
"One row should be present in table 't'"
SELECT * FROM t;
f
10
"No row should be present in table 't4'"
SELECT * FROM t4;
f
DELETE FROM t;
DROP PROCEDURE sp_blank_xa;
# Case D.
Expand All @@ -176,6 +218,7 @@ END|
connect master1,localhost,root,,;
connect master2,localhost,root,,;
connect master3,localhost,root,,;
connect master4,localhost,root,,;
connection default;
INSERT INTO t VALUES (10);
INSERT INTO tm VALUES (10);
Expand All @@ -190,14 +233,23 @@ connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash';
Before the crash
0-1-20
0-1-21
connection master4;
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
INSERT INTO t4 VALUES (13);
connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
Before the crash and never logged trx
0-1-21
connection default;
# Kill the server
disconnect master1;
disconnect master2;
disconnect master3;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-20/ in mysqld.1.err
disconnect master4;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-21/ in mysqld.1.err
Pre-crash binlog file content:
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
Expand Down Expand Up @@ -226,11 +278,14 @@ master-bin.000004 # Query # # XA END X'786964',X'',1
master-bin.000004 # XA_prepare # # XA PREPARE X'786964',X'',1
SELECT @@global.gtid_binlog_pos as 'After the crash';
After the crash
0-1-19
0-1-20
"One row should be present in table 't'"
SELECT * FROM t;
f
10
"No row should be present in table 't4'"
SELECT * FROM t4;
f
DELETE FROM t;
DROP PROCEDURE sp_xa;
# Cleanup
Expand Down
8 changes: 7 additions & 1 deletion mysql-test/suite/binlog/r/binlog_truncate_multi_log.result
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT
INSERT INTO ti VALUES (3, "not gonna survive");
connection default;
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
connect master3,localhost,root,,;
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master3_ready WAIT_FOR master3_go_never_arrives";
INSERT INTO ti VALUES (4, "not gonna be log therefore survive"),(5, "ditto");
connection default;
SET DEBUG_SYNC= "now WAIT_FOR master3_ready";
"List of binary logs before crash"
show binary logs;
Log_name File_size
Expand All @@ -36,7 +41,8 @@ connection default;
# Kill the server
disconnect master1;
disconnect master2;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
disconnect master3;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
FOUND 1 /truncated binlog file:.*master.*000002/ in mysqld.1.err
"One record should be present in table"
SELECT * FROM ti;
Expand Down
15 changes: 14 additions & 1 deletion mysql-test/suite/binlog/t/binlog_truncate_active_log.inc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
connect(master1,localhost,root,,);
connect(master2,localhost,root,,);
connect(master3,localhost,root,,);
connect(master4,localhost,root,,);

--connection default

Expand All @@ -22,16 +23,26 @@ SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready";
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash';

--connection master4
# Simulate prepared & not-logged trx; it will never recover.
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
--send INSERT INTO t4 VALUES (13)

--connection master3
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';

--connection default
--source include/kill_mysqld.inc
--disconnect master1
--disconnect master2
--disconnect master3
--disconnect master4

#
# Server restart
#
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
--source include/start_mysqld.inc

# Check error log for a successful truncate message.
Expand All @@ -49,6 +60,8 @@ SELECT @@global.gtid_binlog_pos as 'Before the crash';
SELECT @@global.gtid_binlog_pos as 'After the crash';
--echo "One row should be present in table 't'"
SELECT * FROM t;
--echo "No row should be present in table 't4'"
SELECT * FROM t4;

# prepare binlog file index for the next test
--inc $binlog_file_index
Expand Down
9 changes: 5 additions & 4 deletions mysql-test/suite/binlog/t/binlog_truncate_active_log.test
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ RESET MASTER;
SET @@global.sync_binlog=1;
CREATE TABLE t (f INT) ENGINE=INNODB;
CREATE TABLE t2 (f INT) ENGINE=INNODB;
CREATE TABLE t4 (f INT) ENGINE=INNODB;
CREATE TABLE tm (f INT) ENGINE=Aria;

# Old (pre-crash) binlog file index initial value.
Expand All @@ -42,14 +43,14 @@ CREATE TABLE tm (f INT) ENGINE=Aria;
# 'query1' onwards will be removed from the binary log.
# Show-binlog-events is to prove that.

--let $truncate_gtid_pos = 0-1-6
--let $truncate_gtid_pos = 0-1-7
--let $query1 = INSERT INTO t VALUES (20)
--let $query2 = DELETE FROM t2 WHERE f = 0 /* no such record */
--source binlog_truncate_active_log.inc

--echo # Case B.
# The inverted sequence ends up to truncate starting from $query2
--let $truncate_gtid_pos = 0-1-10
--let $truncate_gtid_pos = 0-1-11
--let $query1 = DELETE FROM t2 WHERE f = 0
--let $query2 = INSERT INTO t VALUES (20)
--source binlog_truncate_active_log.inc
Expand All @@ -68,7 +69,7 @@ delimiter ;|

# The same as in A with $query2 being the zero-engine XA transaction.
# Both $query1 and $query2 are going to be truncated.
--let $truncate_gtid_pos = 0-1-14
--let $truncate_gtid_pos = 0-1-15
--let $query1 = INSERT INTO t VALUES (20)
--let $query2 = CALL sp_blank_xa
--source binlog_truncate_active_log.inc
Expand All @@ -89,7 +90,7 @@ delimiter ;|

# The same as in B with $query1 being the prepared XA transaction.
# Truncation must occurs at $query2.
--let $truncate_gtid_pos = 0-1-20
--let $truncate_gtid_pos = 0-1-21
--let $query1 = CALL sp_xa
--let $query2 = INSERT INTO t2 VALUES (20)
--source binlog_truncate_active_log.inc
Expand Down
12 changes: 11 additions & 1 deletion mysql-test/suite/binlog/t/binlog_truncate_multi_log.test
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT

--connection default
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";

connect(master3,localhost,root,,);
# The 3nd trx for recovery, it won't get into binlog nor therefore recover
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master3_ready WAIT_FOR master3_go_never_arrives";
--send INSERT INTO ti VALUES (4, "not gonna be log therefore survive"),(5, "ditto")

--connection default
SET DEBUG_SYNC= "now WAIT_FOR master3_ready";

--echo "List of binary logs before crash"
--source include/show_binary_logs.inc
--echo # The gtid binlog state prior the crash will be truncated at the end of the test
Expand All @@ -49,11 +58,12 @@ SELECT @@global.gtid_binlog_state;
--source include/kill_mysqld.inc
--disconnect master1
--disconnect master2
--disconnect master3

#
# Server restart
#
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
--source include/start_mysqld.inc

# Check error log for a successful truncate message.
Expand Down
13 changes: 11 additions & 2 deletions sql/handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,7 @@ typedef struct xid_t XID;
*/
typedef uint Binlog_file_id;
const Binlog_file_id MAX_binlog_id= UINT_MAX;
const my_off_t MAX_off_t = (~(my_off_t) 0);
/*
Compound binlog-id and byte offset of transaction's first event
in a sequence (e.g the recovery sequence) of binlog files.
Expand All @@ -977,14 +978,22 @@ struct xid_recovery_member
my_xid xid;
uint in_engine_prepare; // number of engines that have xid prepared
bool decided_to_commit;
Binlog_offset binlog_coord; // semisync recovery binlog offset
/*
Semisync recovery binlog offset. It's initialized with the maximum
unreachable offset. The max value will remain for any transaction
not found in binlog to yield its rollback decision as it's guaranteed
to be within a truncated tail part of the binlog.
*/
Binlog_offset binlog_coord;
XID *full_xid; // needed by wsrep or past it recovery
decltype(::server_id) server_id; // server id of orginal server

xid_recovery_member(my_xid xid_arg, uint prepare_arg, bool decided_arg,
XID *full_xid_arg, decltype(::server_id) server_id_arg)
: xid(xid_arg), in_engine_prepare(prepare_arg),
decided_to_commit(decided_arg), full_xid(full_xid_arg) , server_id(server_id_arg) {};
decided_to_commit(decided_arg),
binlog_coord(Binlog_offset(MAX_binlog_id, MAX_off_t)),
full_xid(full_xid_arg), server_id(server_id_arg) {};
};

/* for recover() handlerton call */
Expand Down

0 comments on commit da18a5d

Please sign in to comment.