Skip to content

Commit

Permalink
MDEV-24526 binlog rotate via FLUSH LOGS may obsolate binlog file for …
Browse files Browse the repository at this point in the history
…recovery too eary

There was race between a committing transaction and the following in binlog
order FLUSH LOGS that could create a 2nd Binlog checkpoint (BCP) event
in the new file *before* the first logged-in-old-binlog transaction gets committed in
Innodb. That would cause the transaction loss at recovery, should
the server stop right after the BCP.

The race is tackled by enforcing the necessary set of mutexes to be acquired
by FLUSH-LOGS handler in the correct order (of the group commit leader
pattern).

Note, there remain two cases where a similar race is still possible:
  - the above race as it is when the server is run with ("unlikely")
    non-default `--binlog-optimize-thread-scheduling=0` (MDEV-24530), and
  - at unlikely event of bin-logging of Incident event (MDEV-24531) that
    also triggers binlog rotation,
    in both cases though with lesser chances after the current fixes.
  • Loading branch information
andrelkin committed Apr 21, 2021
1 parent ef2749c commit 2a7dd64
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 0 deletions.
51 changes: 51 additions & 0 deletions mysql-test/suite/binlog/r/binlog_checkpoint_flush_logs.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
*** Test that FLUSH LOGS waits if a transaction ordered commit is in progress.
connect con1,localhost,root,,;
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
INSERT INTO t1 VALUES (1, REPEAT("x", 1));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
SET DEBUG_SYNC= "rotate_after_rotate SIGNAL con_flush_ready WAIT_FOR default_go";
FLUSH LOGS;
connect con2,localhost,root,,;
Trx_1 is not yet committed:
SELECT count(*) as 'ZERO' from t1;
ZERO
0
Wait for Trx_2 has rotated binlog:
SET DEBUG_SYNC= "now WAIT_FOR con_flush_ready";
SET DEBUG_SYNC= "now SIGNAL default_go";
connection default;
Must be tree logs in the list:
show binary logs;
Log_name File_size
master-bin.000001 #
master-bin.000002 #
master-bin.000003 #
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000001 # Gtid_list # # []
master-bin.000001 # Binlog_checkpoint # # master-bin.000001
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Annotate_rows # # INSERT INTO t1 VALUES (1, REPEAT("x", 1))
master-bin.000001 # Table_map # # table_id: # (test.t1)
master-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Rotate # # master-bin.000002;pos=POS
Only one Binlog checkpoint must exist and point to master-bin.000001
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000002 # Gtid_list # # [#-#-#]
master-bin.000002 # Binlog_checkpoint # # master-bin.000001
SELECT count(*) as 'ONE' from t1;
ONE
1
connection default;
DROP TABLE t1;
SET debug_sync = 'reset';
79 changes: 79 additions & 0 deletions mysql-test/suite/binlog/t/binlog_checkpoint_flush_logs.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/have_binlog_format_row.inc

# References:
#
# MDEV-24526 binlog rotate via FLUSH LOGS may obsolate binlog file too eary
#
# The test for MDEV-24526 proves the fixes correct observed race condition
# between a commiting transaction and FLUSH-LOGS.
# The plot.
# Trx_1 (con1) transaction binlogs first
# to yield its turn acquiring LOCK_commit_ordered to Trx_2 and stand
# still waiting of a signal that will never arrive.
# Trx_2 can't acquire it in the fixed version even though
# Trx_3 makes sure Trx_2 has reached a post-rotation execution point
# to signal it to proceed.
# Then the server gets crashed and Trx_1 must recover unlike
# in the OLD buggy version.
#
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;

CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;

--echo *** Test that FLUSH LOGS waits if a transaction ordered commit is in progress.

connect(con1,localhost,root,,); # Trx_1
# hang before doing acquiring Commit Ordered mutex
SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";

--send INSERT INTO t1 VALUES (1, REPEAT("x", 1))

connection default; # Trx_2

SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
SET DEBUG_SYNC= "rotate_after_rotate SIGNAL con_flush_ready WAIT_FOR default_go";
--send FLUSH LOGS

connect(con2,localhost,root,,); # Trx_3
--echo Trx_1 is not yet committed:
SELECT count(*) as 'ZERO' from t1;

--echo Wait for Trx_2 has rotated binlog:
SET DEBUG_SYNC= "now WAIT_FOR con_flush_ready";
# Useless signal to prove Trx_2 cannot race Trx_1's commit
# even though Trx_1 never received the being waited 'con1_go'.
SET DEBUG_SYNC= "now SIGNAL default_go";

--let $shutdown_timeout=0
--source include/restart_mysqld.inc

connection default;
--enable_reconnect
--error 0,2013
--reap

--echo Must be tree logs in the list:
--source include/show_binary_logs.inc
--let $binlog_file= master-bin.000001
--let $binlog_start= 4
--source include/show_binlog_events.inc

--echo Only one Binlog checkpoint must exist and point to master-bin.000001
--let $binlog_file= master-bin.000002
--let $binlog_start= 4
--source include/show_binlog_events.inc


# In the buggy server version the following select may have
# resulted with ZERO:
SELECT count(*) as 'ONE' from t1;

# Clean up.
connection default;

DROP TABLE t1;
SET debug_sync = 'reset';
19 changes: 19 additions & 0 deletions sql/log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6851,6 +6851,9 @@ int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
bool check_purge= false;

mysql_mutex_lock(&LOCK_log);

DEBUG_SYNC(current_thd, "rotate_after_acquire_LOCK_log");

prev_binlog_id= current_binlog_id;

if ((err_gtid= do_delete_gtid_domain(domain_drop_lex)))
Expand All @@ -6861,11 +6864,22 @@ int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
}
else if ((error= rotate(force_rotate, &check_purge)))
check_purge= false;

DEBUG_SYNC(current_thd, "rotate_after_rotate");

/*
NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
the mutex. Otherwise causes various deadlocks.
Explicit binlog rotation must be synchronized with a concurrent
binlog ordered commit, in particular not let binlog
checkpoint notification request until early binlogged
concurrent commits have has been completed.
*/
mysql_mutex_lock(&LOCK_after_binlog_sync);
mysql_mutex_unlock(&LOCK_log);
mysql_mutex_lock(&LOCK_commit_ordered);
mysql_mutex_unlock(&LOCK_after_binlog_sync);
mysql_mutex_unlock(&LOCK_commit_ordered);

if (check_purge)
checkpoint_and_purge(prev_binlog_id);
Expand Down Expand Up @@ -8014,7 +8028,12 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
}

DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");

mysql_mutex_lock(&LOCK_commit_ordered);
DBUG_EXECUTE_IF("crash_before_engine_commit",
{
DBUG_SUICIDE();
});
last_commit_pos_offset= commit_offset;

/*
Expand Down

0 comments on commit 2a7dd64

Please sign in to comment.