Skip to content

Commit

Permalink
MDEV-16091: Seconds_Behind_Master spikes to millions of seconds
Browse files Browse the repository at this point in the history
Problem:
========
A slave’s relay log format description event is used when
calculating Seconds_Behind_Master (SBM). This forces the SBM
value to spike when processing these events, as their creation
date is set to the timestamp that the IO thread begins.

Solution:
========
When the slave generates a format description event, mark the
event as a relay log event so it does not update the
rli->last_master_timestamp variable.

Reviewed By:
============
Andrei Elkin <andrei.elkin@mariadb.com>
  • Loading branch information
bnestere committed Jan 4, 2022
1 parent 452c9a4 commit 96de6bf
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 0 deletions.
41 changes: 41 additions & 0 deletions mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
include/master-slave.inc
[connection master]
connection slave;
include/stop_slave.inc
SET @save_dbug= @@GLOBAL.debug_dbug;
SET @@global.debug_dbug="+d,pause_sql_thread_on_fde";
include/start_slave.inc
# Future events must be logged at least 2 seconds after
# the slave starts
connection master;
# Write events to ensure slave will be consistent with master
create table t1 (a int);
insert into t1 values (1);
# Flush logs on master forces slave to generate a Format description
# event in its relay log
flush logs;
connection slave;
# Ignore FDEs that happen before the CREATE/INSERT commands
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
# On the next FDE, the slave should have the master CREATE/INSERT events
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
select count(*)=1 from t1;
count(*)=1
1
# The relay log FDE has been processed - here we check to ensure it was
# not considered in Seconds_Behind_Master calculation
connection slave1;
# Safely resume slave SQL thread
SET @@global.debug_dbug='';
SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR';
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
SET DEBUG_SYNC='RESET';
connection master;
DROP TABLE t1;
connection slave;
connection slave;
SET @@global.debug_dbug=$save_dbug;
include/rpl_end.inc
86 changes: 86 additions & 0 deletions mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#
# Purpose:
# This test validates that a slave's relay log format description event is
# not used to calculate the Seconds_Behind_Master time displayed by
# SHOW SLAVE STATUS.
#
# Methodology:
# Ensure that a slave's reported Seconds_Behind_Master does not point before
# a time in which we can prove that it has progressed beyond. The slave's
# relay log events are created using the timestamp at which the IO thread was
# created. Therefore, after starting the slave's IO thread, we sleep so any
# proceeding events are forced to have later timestamps. After sleeping, we run
# MDL statements on the master and save the time at which they are binlogged.
# Once the slave executes these MDL commands, we have proven that the slave has
# caught up to this saved timestamp. At this point, if the value of
# Seconds_Behind_Master points before the time in which the MDL events were
# logged, it is invalid.
#
# References:
# MDEV-16091: Seconds_Behind_Master spikes to millions of seconds
#
--source include/have_debug.inc
--source include/have_innodb.inc
--source include/master-slave.inc

--connection slave
--source include/stop_slave.inc
SET @save_dbug= @@GLOBAL.debug_dbug;
SET @@global.debug_dbug="+d,pause_sql_thread_on_fde";
--source include/start_slave.inc

--let $sleep_time=2
--echo # Future events must be logged at least $sleep_time seconds after
--echo # the slave starts
--sleep $sleep_time

--connection master
--echo # Write events to ensure slave will be consistent with master
create table t1 (a int);
insert into t1 values (1);
--let $t_master_events_logged= `SELECT UNIX_TIMESTAMP()`

--echo # Flush logs on master forces slave to generate a Format description
--echo # event in its relay log
flush logs;

--connection slave
--echo # Ignore FDEs that happen before the CREATE/INSERT commands
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';

--echo # On the next FDE, the slave should have the master CREATE/INSERT events
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
select count(*)=1 from t1;

--echo # The relay log FDE has been processed - here we check to ensure it was
--echo # not considered in Seconds_Behind_Master calculation
--connection slave1
let $sbm= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1);
--let $t_now= `SELECT UNIX_TIMESTAMP()`

if(`select $sbm > $t_now - $t_master_events_logged`)
{
die "A relay log event was incorrectly used to set Seconds_Behind_Master";
}

--echo # Safely resume slave SQL thread
SET @@global.debug_dbug='';
SET DEBUG_SYNC='pause_sql_thread_on_fde CLEAR';
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';

# Reset last sql_thread_continue signal
SET DEBUG_SYNC='RESET';

# Cleanup
--connection master
DROP TABLE t1;
--save_master_pos
--sync_slave_with_master

--connection slave
SET @@global.debug_dbug=$save_dbug;

--source include/rpl_end.inc
1 change: 1 addition & 0 deletions sql/log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3463,6 +3463,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options
: BINLOG_CHECKSUM_ALG_OFF;
s.checksum_alg= relay_log_checksum_alg;
s.set_relay_log_event();
}
else
s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options;
Expand Down
9 changes: 9 additions & 0 deletions sql/slave.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4033,6 +4033,15 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
#endif /* WITH_WSREP */

thread_safe_increment64(&rli->executed_entries);
DBUG_EXECUTE_IF(
"pause_sql_thread_on_fde",
if (ev && typ == FORMAT_DESCRIPTION_EVENT) {
DBUG_ASSERT(!debug_sync_set_action(
thd,
STRING_WITH_LEN(
"now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue")));
});

DBUG_RETURN(exec_res);
}
mysql_mutex_unlock(&rli->data_lock);
Expand Down

0 comments on commit 96de6bf

Please sign in to comment.