Skip to content

Commit 2a7dd64

Browse files
committed
MDEV-24526 binlog rotate via FLUSH LOGS may obsolate binlog file for recovery too eary
There was race between a committing transaction and the following in binlog order FLUSH LOGS that could create a 2nd Binlog checkpoint (BCP) event in the new file *before* the first logged-in-old-binlog transaction gets committed in Innodb. That would cause the transaction loss at recovery, should the server stop right after the BCP. The race is tackled by enforcing the necessary set of mutexes to be acquired by FLUSH-LOGS handler in the correct order (of the group commit leader pattern). Note, there remain two cases where a similar race is still possible: - the above race as it is when the server is run with ("unlikely") non-default `--binlog-optimize-thread-scheduling=0` (MDEV-24530), and - at unlikely event of bin-logging of Incident event (MDEV-24531) that also triggers binlog rotation, in both cases though with lesser chances after the current fixes.
1 parent ef2749c commit 2a7dd64

File tree

3 files changed

+149
-0
lines changed

3 files changed

+149
-0
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
2+
RESET MASTER;
3+
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
4+
*** Test that FLUSH LOGS waits if a transaction ordered commit is in progress.
5+
connect con1,localhost,root,,;
6+
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
7+
INSERT INTO t1 VALUES (1, REPEAT("x", 1));
8+
connection default;
9+
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
10+
SET DEBUG_SYNC= "rotate_after_rotate SIGNAL con_flush_ready WAIT_FOR default_go";
11+
FLUSH LOGS;
12+
connect con2,localhost,root,,;
13+
Trx_1 is not yet committed:
14+
SELECT count(*) as 'ZERO' from t1;
15+
ZERO
16+
0
17+
Wait for Trx_2 has rotated binlog:
18+
SET DEBUG_SYNC= "now WAIT_FOR con_flush_ready";
19+
SET DEBUG_SYNC= "now SIGNAL default_go";
20+
connection default;
21+
Must be tree logs in the list:
22+
show binary logs;
23+
Log_name File_size
24+
master-bin.000001 #
25+
master-bin.000002 #
26+
master-bin.000003 #
27+
include/show_binlog_events.inc
28+
Log_name Pos Event_type Server_id End_log_pos Info
29+
master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
30+
master-bin.000001 # Gtid_list # # []
31+
master-bin.000001 # Binlog_checkpoint # # master-bin.000001
32+
master-bin.000001 # Gtid # # GTID #-#-#
33+
master-bin.000001 # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb
34+
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
35+
master-bin.000001 # Annotate_rows # # INSERT INTO t1 VALUES (1, REPEAT("x", 1))
36+
master-bin.000001 # Table_map # # table_id: # (test.t1)
37+
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
38+
master-bin.000001 # Xid # # COMMIT /* XID */
39+
master-bin.000001 # Rotate # # master-bin.000002;pos=POS
40+
Only one Binlog checkpoint must exist and point to master-bin.000001
41+
include/show_binlog_events.inc
42+
Log_name Pos Event_type Server_id End_log_pos Info
43+
master-bin.000002 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
44+
master-bin.000002 # Gtid_list # # [#-#-#]
45+
master-bin.000002 # Binlog_checkpoint # # master-bin.000001
46+
SELECT count(*) as 'ONE' from t1;
47+
ONE
48+
1
49+
connection default;
50+
DROP TABLE t1;
51+
SET debug_sync = 'reset';
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
--source include/have_innodb.inc
2+
--source include/have_debug.inc
3+
--source include/have_debug_sync.inc
4+
--source include/have_binlog_format_row.inc
5+
6+
# References:
7+
#
8+
# MDEV-24526 binlog rotate via FLUSH LOGS may obsolate binlog file too eary
9+
#
10+
# The test for MDEV-24526 proves the fixes correct observed race condition
11+
# between a commiting transaction and FLUSH-LOGS.
12+
# The plot.
13+
# Trx_1 (con1) transaction binlogs first
14+
# to yield its turn acquiring LOCK_commit_ordered to Trx_2 and stand
15+
# still waiting of a signal that will never arrive.
16+
# Trx_2 can't acquire it in the fixed version even though
17+
# Trx_3 makes sure Trx_2 has reached a post-rotation execution point
18+
# to signal it to proceed.
19+
# Then the server gets crashed and Trx_1 must recover unlike
20+
# in the OLD buggy version.
21+
#
22+
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
23+
RESET MASTER;
24+
25+
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
26+
27+
--echo *** Test that FLUSH LOGS waits if a transaction ordered commit is in progress.
28+
29+
connect(con1,localhost,root,,); # Trx_1
30+
# hang before doing acquiring Commit Ordered mutex
31+
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
32+
33+
--send INSERT INTO t1 VALUES (1, REPEAT("x", 1))
34+
35+
connection default; # Trx_2
36+
37+
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
38+
SET DEBUG_SYNC= "rotate_after_rotate SIGNAL con_flush_ready WAIT_FOR default_go";
39+
--send FLUSH LOGS
40+
41+
connect(con2,localhost,root,,); # Trx_3
42+
--echo Trx_1 is not yet committed:
43+
SELECT count(*) as 'ZERO' from t1;
44+
45+
--echo Wait for Trx_2 has rotated binlog:
46+
SET DEBUG_SYNC= "now WAIT_FOR con_flush_ready";
47+
# Useless signal to prove Trx_2 cannot race Trx_1's commit
48+
# even though Trx_1 never received the being waited 'con1_go'.
49+
SET DEBUG_SYNC= "now SIGNAL default_go";
50+
51+
--let $shutdown_timeout=0
52+
--source include/restart_mysqld.inc
53+
54+
connection default;
55+
--enable_reconnect
56+
--error 0,2013
57+
--reap
58+
59+
--echo Must be tree logs in the list:
60+
--source include/show_binary_logs.inc
61+
--let $binlog_file= master-bin.000001
62+
--let $binlog_start= 4
63+
--source include/show_binlog_events.inc
64+
65+
--echo Only one Binlog checkpoint must exist and point to master-bin.000001
66+
--let $binlog_file= master-bin.000002
67+
--let $binlog_start= 4
68+
--source include/show_binlog_events.inc
69+
70+
71+
# In the buggy server version the following select may have
72+
# resulted with ZERO:
73+
SELECT count(*) as 'ONE' from t1;
74+
75+
# Clean up.
76+
connection default;
77+
78+
DROP TABLE t1;
79+
SET debug_sync = 'reset';

sql/log.cc

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6851,6 +6851,9 @@ int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
68516851
bool check_purge= false;
68526852

68536853
mysql_mutex_lock(&LOCK_log);
6854+
6855+
DEBUG_SYNC(current_thd, "rotate_after_acquire_LOCK_log");
6856+
68546857
prev_binlog_id= current_binlog_id;
68556858

68566859
if ((err_gtid= do_delete_gtid_domain(domain_drop_lex)))
@@ -6861,11 +6864,22 @@ int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
68616864
}
68626865
else if ((error= rotate(force_rotate, &check_purge)))
68636866
check_purge= false;
6867+
6868+
DEBUG_SYNC(current_thd, "rotate_after_rotate");
6869+
68646870
/*
68656871
NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
68666872
the mutex. Otherwise causes various deadlocks.
6873+
Explicit binlog rotation must be synchronized with a concurrent
6874+
binlog ordered commit, in particular not let binlog
6875+
checkpoint notification request until early binlogged
6876+
concurrent commits have has been completed.
68676877
*/
6878+
mysql_mutex_lock(&LOCK_after_binlog_sync);
68686879
mysql_mutex_unlock(&LOCK_log);
6880+
mysql_mutex_lock(&LOCK_commit_ordered);
6881+
mysql_mutex_unlock(&LOCK_after_binlog_sync);
6882+
mysql_mutex_unlock(&LOCK_commit_ordered);
68696883

68706884
if (check_purge)
68716885
checkpoint_and_purge(prev_binlog_id);
@@ -8014,7 +8028,12 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
80148028
}
80158029

80168030
DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
8031+
80178032
mysql_mutex_lock(&LOCK_commit_ordered);
8033+
DBUG_EXECUTE_IF("crash_before_engine_commit",
8034+
{
8035+
DBUG_SUICIDE();
8036+
});
80188037
last_commit_pos_offset= commit_offset;
80198038

80208039
/*

0 commit comments

Comments
 (0)