Skip to content

Commit 35c732c

Browse files
committed
MDEV-25611: RESET MASTER causes the server to hang
RESET MASTER waits for storage engines to reply to a binlog checkpoint requests. If this response is delayed for a long time for some reason, then RESET MASTER can hang. Fix this by forcing a log sync in all engines just before waiting for the checkpoint reply. (Waiting for old checkpoint responses is needed to preserve durability of any commits that were synced to disk in the to-be-deleted binlog but not yet synced in the engine.) Reviewed-by: Andrei Elkin <andrei.elkin@mariadb.com> Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
1 parent 7ea9e13 commit 35c732c

File tree

4 files changed

+88
-4
lines changed

4 files changed

+88
-4
lines changed
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
*** MDEV-25611: RESET MASTER hangs waiting for InnoDB to flush its log
2+
SET @old_dbug= @@global.DEBUG_DBUG;
3+
SET GLOBAL debug_dbug="+d,ib_log_checkpoint_avoid";
4+
SET @old_flush= @@GLOBAL.innodb_flush_log_at_trx_commit;
5+
SET GLOBAL innodb_flush_log_at_trx_commit= 0;
6+
SET @old_flush_timeout= @@GLOBAL.innoDB_flush_log_at_timeout;
7+
SET GLOBAL innoDB_flush_log_at_timeout=2700;
8+
connection default;
9+
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB STATS_PERSISTENT=0;
10+
INSERT INTO t1 VALUES (1,1);
11+
INSERT INTO t1 VALUES (2,1);
12+
INSERT INTO t1 VALUES (3,1);
13+
connect stop_purge,localhost,root,,;
14+
START TRANSACTION WITH CONSISTENT SNAPSHOT;
15+
connection default;
16+
INSERT INTO t1 VALUES (4,2);
17+
DELETE FROM t1 WHERE a in (1,2,3);
18+
FLUSH BINARY LOGS;
19+
connection stop_purge;
20+
ROLLBACK;
21+
connection default;
22+
disconnect stop_purge;
23+
RESET MASTER;
24+
connection default;
25+
DROP TABLE t1;
26+
SET GLOBAL debug_dbug= @old_dbug;
27+
SET GLOBAL innodb_flush_log_at_trx_commit= @old_flush;
28+
SET GLOBAL innoDB_flush_log_at_timeout= @old_flush_timeout;
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
source include/have_innodb.inc;
2+
source include/have_log_bin.inc;
3+
source include/have_binlog_format_mixed.inc;
4+
5+
--echo *** MDEV-25611: RESET MASTER hangs waiting for InnoDB to flush its log
6+
# Error injection to minimize extra log flushing inside innodb.
7+
SET @old_dbug= @@global.DEBUG_DBUG;
8+
SET GLOBAL debug_dbug="+d,ib_log_checkpoint_avoid";
9+
# Couple other settings that reduce redo log flushing, thus potentially
10+
# triggering the hang.
11+
SET @old_flush= @@GLOBAL.innodb_flush_log_at_trx_commit;
12+
SET GLOBAL innodb_flush_log_at_trx_commit= 0;
13+
SET @old_flush_timeout= @@GLOBAL.innoDB_flush_log_at_timeout;
14+
SET GLOBAL innoDB_flush_log_at_timeout=2700;
15+
16+
--connection default
17+
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB STATS_PERSISTENT=0;
18+
INSERT INTO t1 VALUES (1,1);
19+
INSERT INTO t1 VALUES (2,1);
20+
INSERT INTO t1 VALUES (3,1);
21+
22+
connect (stop_purge,localhost,root,,);
23+
# This blocks purge due to old data being still visible.
24+
START TRANSACTION WITH CONSISTENT SNAPSHOT;
25+
26+
--connection default
27+
INSERT INTO t1 VALUES (4,2);
28+
DELETE FROM t1 WHERE a in (1,2,3);
29+
30+
# Rotate the binlog and wait for everything to settle down and latest binlog
31+
# checkpoint to be done. The small sleep helps trigger the bug more reliably
32+
# before the fix.
33+
FLUSH BINARY LOGS;
34+
--sleep 0.1
35+
36+
# Now unblock the purge, and wait for some purge records to be written
37+
# to the redo log so the LSN is incremented but will not be synced to
38+
# disk until something else happens.
39+
--connection stop_purge
40+
ROLLBACK;
41+
--connection default
42+
--disconnect stop_purge
43+
44+
--sleep 0.1
45+
46+
# Now see if RESET MASTER will request and wait for a binlog checkpoint that is never reported.
47+
RESET MASTER;
48+
49+
# Clean up.
50+
--connection default
51+
DROP TABLE t1;
52+
SET GLOBAL debug_dbug= @old_dbug;
53+
SET GLOBAL innodb_flush_log_at_trx_commit= @old_flush;
54+
SET GLOBAL innoDB_flush_log_at_timeout= @old_flush_timeout;

sql/log.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4435,6 +4435,9 @@ bool MYSQL_BIN_LOG::reset_logs(THD *thd, bool create_new_log,
44354435
mark_xids_active(current_binlog_id, 1);
44364436
do_checkpoint_request(current_binlog_id);
44374437

4438+
/* Flush all engine logs to force checkpoint responses to come through. */
4439+
ha_flush_logs();
4440+
44384441
/* Now wait for all checkpoint requests and pending unlog() to complete. */
44394442
mysql_mutex_lock(&LOCK_xid_list);
44404443
for (;;)

storage/innobase/handler/ha_innodb.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1583,10 +1583,9 @@ innobase_start_trx_and_assign_read_view(
15831583
@return false */
15841584
static bool innobase_flush_logs(handlerton*)
15851585
{
1586-
if (!srv_read_only_mode && srv_flush_log_at_trx_commit)
1587-
/* Write any outstanding redo log. Durably if
1588-
innodb_flush_log_at_trx_commit=1. */
1589-
log_buffer_flush_to_disk(srv_flush_log_at_trx_commit == 1);
1586+
if (!srv_read_only_mode)
1587+
/* Write and flush any outstanding redo log. */
1588+
log_buffer_flush_to_disk(true);
15901589
return false;
15911590
}
15921591

0 commit comments

Comments
 (0)