Skip to content

Commit 96de6bf

Browse files
committed
MDEV-16091: Seconds_Behind_Master spikes to millions of seconds
Problem: ======== A slave’s relay log format description event is used when calculating Seconds_Behind_Master (SBM). This forces the SBM value to spike when processing these events, as their creation date is set to the timestamp that the IO thread begins. Solution: ======== When the slave generates a format description event, mark the event as a relay log event so it does not update the rli->last_master_timestamp variable. Reviewed By: ============ Andrei Elkin <andrei.elkin@mariadb.com>
1 parent 452c9a4 commit 96de6bf

File tree

4 files changed

+137
-0
lines changed

4 files changed

+137
-0
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
include/master-slave.inc
2+
[connection master]
3+
connection slave;
4+
include/stop_slave.inc
5+
SET @save_dbug= @@GLOBAL.debug_dbug;
6+
SET @@global.debug_dbug="+d,pause_sql_thread_on_fde";
7+
include/start_slave.inc
8+
# Future events must be logged at least 2 seconds after
9+
# the slave starts
10+
connection master;
11+
# Write events to ensure slave will be consistent with master
12+
create table t1 (a int);
13+
insert into t1 values (1);
14+
# Flush logs on master forces slave to generate a Format description
15+
# event in its relay log
16+
flush logs;
17+
connection slave;
18+
# Ignore FDEs that happen before the CREATE/INSERT commands
19+
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
20+
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
21+
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
22+
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
23+
# On the next FDE, the slave should have the master CREATE/INSERT events
24+
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
25+
select count(*)=1 from t1;
26+
count(*)=1
27+
1
28+
# The relay log FDE has been processed - here we check to ensure it was
29+
# not considered in Seconds_Behind_Master calculation
30+
connection slave1;
31+
# Safely resume slave SQL thread
32+
SET @@global.debug_dbug='';
33+
SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR';
34+
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
35+
SET DEBUG_SYNC='RESET';
36+
connection master;
37+
DROP TABLE t1;
38+
connection slave;
39+
connection slave;
40+
SET @@global.debug_dbug=$save_dbug;
41+
include/rpl_end.inc
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#
2+
# Purpose:
3+
# This test validates that a slave's relay log format description event is
4+
# not used to calculate the Seconds_Behind_Master time displayed by
5+
# SHOW SLAVE STATUS.
6+
#
7+
# Methodology:
8+
# Ensure that a slave's reported Seconds_Behind_Master does not point before
9+
# a time in which we can prove that it has progressed beyond. The slave's
10+
# relay log events are created using the timestamp at which the IO thread was
11+
# created. Therefore, after starting the slave's IO thread, we sleep so any
12+
# proceeding events are forced to have later timestamps. After sleeping, we run
13+
# MDL statements on the master and save the time at which they are binlogged.
14+
# Once the slave executes these MDL commands, we have proven that the slave has
15+
# caught up to this saved timestamp. At this point, if the value of
16+
# Seconds_Behind_Master points before the time in which the MDL events were
17+
# logged, it is invalid.
18+
#
19+
# References:
20+
# MDEV-16091: Seconds_Behind_Master spikes to millions of seconds
21+
#
22+
--source include/have_debug.inc
23+
--source include/have_innodb.inc
24+
--source include/master-slave.inc
25+
26+
--connection slave
27+
--source include/stop_slave.inc
28+
SET @save_dbug= @@GLOBAL.debug_dbug;
29+
SET @@global.debug_dbug="+d,pause_sql_thread_on_fde";
30+
--source include/start_slave.inc
31+
32+
--let $sleep_time=2
33+
--echo # Future events must be logged at least $sleep_time seconds after
34+
--echo # the slave starts
35+
--sleep $sleep_time
36+
37+
--connection master
38+
--echo # Write events to ensure slave will be consistent with master
39+
create table t1 (a int);
40+
insert into t1 values (1);
41+
--let $t_master_events_logged= `SELECT UNIX_TIMESTAMP()`
42+
43+
--echo # Flush logs on master forces slave to generate a Format description
44+
--echo # event in its relay log
45+
flush logs;
46+
47+
--connection slave
48+
--echo # Ignore FDEs that happen before the CREATE/INSERT commands
49+
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
50+
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
51+
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
52+
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
53+
54+
--echo # On the next FDE, the slave should have the master CREATE/INSERT events
55+
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
56+
select count(*)=1 from t1;
57+
58+
--echo # The relay log FDE has been processed - here we check to ensure it was
59+
--echo # not considered in Seconds_Behind_Master calculation
60+
--connection slave1
61+
let $sbm= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1);
62+
--let $t_now= `SELECT UNIX_TIMESTAMP()`
63+
64+
if(`select $sbm > $t_now - $t_master_events_logged`)
65+
{
66+
die "A relay log event was incorrectly used to set Seconds_Behind_Master";
67+
}
68+
69+
--echo # Safely resume slave SQL thread
70+
SET @@global.debug_dbug='';
71+
SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR';
72+
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
73+
74+
# Reset last sql_thread_continue signal
75+
SET DEBUG_SYNC='RESET';
76+
77+
# Cleanup
78+
--connection master
79+
DROP TABLE t1;
80+
--save_master_pos
81+
--sync_slave_with_master
82+
83+
--connection slave
84+
SET @@global.debug_dbug=$save_dbug;
85+
86+
--source include/rpl_end.inc

sql/log.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3463,6 +3463,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
34633463
opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options
34643464
: BINLOG_CHECKSUM_ALG_OFF;
34653465
s.checksum_alg= relay_log_checksum_alg;
3466+
s.set_relay_log_event();
34663467
}
34673468
else
34683469
s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options;

sql/slave.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4033,6 +4033,15 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
40334033
#endif /* WITH_WSREP */
40344034

40354035
thread_safe_increment64(&rli->executed_entries);
4036+
DBUG_EXECUTE_IF(
4037+
"pause_sql_thread_on_fde",
4038+
if (ev && typ == FORMAT_DESCRIPTION_EVENT) {
4039+
DBUG_ASSERT(!debug_sync_set_action(
4040+
thd,
4041+
STRING_WITH_LEN(
4042+
"now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue")));
4043+
});
4044+
40364045
DBUG_RETURN(exec_res);
40374046
}
40384047
mysql_mutex_unlock(&rli->data_lock);

0 commit comments

Comments
 (0)