Skip to content

Commit b70cd26

Browse files
committed
MDEV-11668 rpl.rpl_heartbeat_basic fails sporadically in buildbot
On a slow builder, a delay between binlog events on master could occur, which would cause a heartbeat which is not expected by the test. The solution is to monitor the timing of binlog events on the master and only perform the heartbeat check if no critical delays have happened. Additionally, an unused variable was removed (this change is unrelated to the bugfix).
1 parent 29d78db commit b70cd26

File tree

2 files changed

+40
-9
lines changed

2 files changed

+40
-9
lines changed

mysql-test/suite/rpl/r/rpl_heartbeat_basic.result

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ RESET SLAVE;
77
SET @restore_slave_net_timeout=@@global.slave_net_timeout;
88
RESET MASTER;
99
SET @restore_slave_net_timeout=@@global.slave_net_timeout;
10-
SET @restore_event_scheduler=@@global.event_scheduler;
1110

1211
*** Default value ***
1312
CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_PORT=MASTER_PORT, MASTER_USER='root';
@@ -223,7 +222,7 @@ RESET SLAVE;
223222
CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_PORT=MASTER_PORT, MASTER_USER='root', MASTER_CONNECT_RETRY=20, MASTER_HEARTBEAT_PERIOD=5;
224223
include/start_slave.inc
225224
SET @@global.event_scheduler=1;
226-
Number of received heartbeat events: 0
225+
Received heartbeats meet expectations: TRUE
227226
DELETE FROM t1;
228227
DROP EVENT e1;
229228

mysql-test/suite/rpl/t/rpl_heartbeat_basic.test

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ eval SET @restore_slave_heartbeat_timeout=$slave_heartbeat_timeout;
3434
--connection master
3535
RESET MASTER;
3636
SET @restore_slave_net_timeout=@@global.slave_net_timeout;
37-
SET @restore_event_scheduler=@@global.event_scheduler;
3837
--echo
3938

4039
#
@@ -352,21 +351,54 @@ eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_PORT=$MASTER_MYPORT, MASTE
352351
--connection master
353352
# Enable scheduler
354353
SET @@global.event_scheduler=1;
354+
355355
--sync_slave_with_master
356356
let $rcvd_heartbeats_before= query_get_value(SHOW STATUS LIKE 'slave_received_heartbeats', Value, 1);
357-
# Wait some updates for table t1 from master
358-
let $wait_condition= SELECT COUNT(*)=1 FROM t1 WHERE a > 5;
359-
--source include/wait_condition.inc
357+
358+
--connection master
359+
360+
# Whether or not to send a heartbeat is decided on the master, based on
361+
# whether the binlog was updated during the period or not.
362+
# Even with the 1-second event, we cannot make the master to write binary
363+
# logs (or execute SQL) in a timely manner. We can only check that they
364+
# were executed in a timely manner, and if they were not, neutralize the
365+
# heartbeat check on the slave.
366+
# We will wait for 5 events, and keep checking 'Binlog_commits' on master.
367+
# Time interval between consequent events will be measured.
368+
# We can only expect that no heartbeats have been sent if the interval
369+
# between events never exceeded MASTER_HEARTBEAT_PERIOD.
370+
# If it has exceeded the value at least once, the slave can legitimately
371+
# receive a heartbeat (but we cannot require it, because the delay
372+
# could have occurred somewhere else, e.g. upon checking the status).
373+
# So, if the delay is detected, we will signal slave to ignore possible
374+
# heartbeats.
375+
376+
let $possible_heartbeats= 0;
377+
let $commits_to_wait= 5;
378+
while ($commits_to_wait)
379+
{
380+
let $tm= `SELECT UNIX_TIMESTAMP(NOW(3))`;
381+
let $binlog_commits= query_get_value(SHOW STATUS LIKE 'Binlog_commits', Value, 1);
382+
let $wait_condition= SELECT VARIABLE_VALUE > $binlog_commits FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME= 'BINLOG_COMMITS';
383+
--source include/wait_condition.inc
384+
dec $commits_to_wait;
385+
if (`SELECT UNIX_TIMESTAMP(NOW(3)) > $tm + 5`)
386+
{
387+
let $possible_heartbeats= 1;
388+
let $commits_to_wait= 0;
389+
}
390+
}
391+
392+
--connection slave
360393
let $rcvd_heartbeats_after= query_get_value(SHOW STATUS LIKE 'slave_received_heartbeats', Value, 1);
361-
let $result= query_get_value(SELECT ($rcvd_heartbeats_after - $rcvd_heartbeats_before) > 0 AS Result, Result, 1);
362-
--echo Number of received heartbeat events: $result
394+
let $result= `SELECT CASE WHEN $possible_heartbeats THEN 'TRUE' WHEN $rcvd_heartbeats_after - $rcvd_heartbeats_before > 0 THEN 'FALSE' ELSE 'TRUE' END`;
395+
--echo Received heartbeats meet expectations: $result
363396
--connection master
364397
DELETE FROM t1;
365398
DROP EVENT e1;
366399
--sync_slave_with_master
367400
--echo
368401

369-
370402
# Check received heartbeat events while logs flushed on slave
371403
--echo *** Flush logs on slave ***
372404
STOP SLAVE;

0 commit comments

Comments
 (0)