Skip to content

Commit 8a45128

Browse files
committed
Add MDEV-25999 & MDEV-36840 to MDEV-16091’s test
1. Stricten MDEV-16091’s `rpl.rpl_seconds_behind_master_spike` to prove that its fix covered MDEV-25999 as well 2. Add a Parallel Replication variant via a new `.combinations` file; its current failure confirm MDEV-36840. Because Parallel Replication queues Format Description Events as position update metaevents, this variant uses a separate set of API- compatible breakpoints to pause both the main and worker threads. To support them, this commit replaces the MDEV-33327 patch c75905c with a live-activation solution. In contrast, previous iterations queued the activation pre-test and relied on other factors to enable the break. Reviewed-by: Brandon Nesterenko <brandon.nesterenko@mariadb.com>
1 parent 7a6da5b commit 8a45128

File tree

5 files changed

+59
-47
lines changed

5 files changed

+59
-47
lines changed

mysql-test/suite/rpl/r/rpl_seconds_behind_master_spike.result

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,18 @@ connection slave;
44
include/stop_slave.inc
55
CHANGE MASTER TO MASTER_USE_GTID=NO;
66
include/start_slave.inc
7-
include/stop_slave.inc
8-
SET @save_dbug= @@GLOBAL.debug_dbug;
9-
SET @@global.debug_dbug="+d,pause_sql_thread_on_relay_fde_after_trans";
10-
SET @@global.debug_dbug="+d,negate_clock_diff_with_master";
11-
include/start_slave.inc
127
# Future events must be logged at least 2 seconds after
138
# the slave starts
149
connection master;
1510
# Write events to ensure slave will be consistent with master
1611
create table t1 (a int);
1712
insert into t1 values (1);
13+
connection slave;
14+
# Enable breakpoints
15+
SET @save_dbug= @@GLOBAL.debug_dbug;
16+
SET @@GLOBAL.debug_dbug=
17+
'+d,pause_sql_thread_on_fde,negate_clock_diff_with_master';
18+
connection master;
1819
# Flush logs on master forces slave to generate a Format description
1920
# event in its relay log
2021
flush logs;
@@ -26,8 +27,8 @@ count(*)=1
2627
1
2728
# The relay log FDE has been processed - here we check to ensure it was
2829
# not considered in Seconds_Behind_Master calculation
29-
connection slave1;
30-
# Safely resume slave SQL thread
30+
Seconds_Behind_Master = '0'
31+
# Safely resume slave SQL (worker) thread
3132
# Prove SQL thread is in state "debug sync point: now"
3233
SET @@global.debug_dbug="-d,pause_sql_thread_on_fde";
3334
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[serial]
2+
--slave-parallel-threads=0
3+
4+
[parallel]
5+
--slave-parallel-threads=1

mysql-test/suite/rpl/t/rpl_seconds_behind_master_spike.test

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#
1919
# References:
2020
# MDEV-16091: Seconds_Behind_Master spikes to millions of seconds
21+
# MDEV-25999: Unrealistic "Seconds behind master" value
22+
# MDEV-36840: Seconds_Behind_Master Spike at Log Rotation on Parallel Replica
2123
#
2224
--source include/have_debug.inc
2325
--source include/have_debug_sync.inc
@@ -31,12 +33,6 @@
3133
CHANGE MASTER TO MASTER_USE_GTID=NO;
3234
--source include/start_slave.inc
3335

34-
--source include/stop_slave.inc
35-
SET @save_dbug= @@GLOBAL.debug_dbug;
36-
SET @@global.debug_dbug="+d,pause_sql_thread_on_relay_fde_after_trans";
37-
SET @@global.debug_dbug="+d,negate_clock_diff_with_master";
38-
--source include/start_slave.inc
39-
4036
--let $sleep_time=2
4137
--echo # Future events must be logged at least $sleep_time seconds after
4238
--echo # the slave starts
@@ -46,33 +42,38 @@ SET @@global.debug_dbug="+d,negate_clock_diff_with_master";
4642
--echo # Write events to ensure slave will be consistent with master
4743
create table t1 (a int);
4844
insert into t1 values (1);
49-
--let $t_master_events_logged= `SELECT UNIX_TIMESTAMP()`
45+
--sync_slave_with_master
46+
let $wait_condition= SELECT STATE LIKE '%waiting for more updates'
47+
FROM information_schema.PROCESSLIST WHERE COMMAND = 'Slave_SQL';
48+
--source include/wait_condition.inc
49+
50+
--echo # Enable breakpoints
51+
SET @save_dbug= @@GLOBAL.debug_dbug;
52+
SET @@GLOBAL.debug_dbug=
53+
'+d,pause_sql_thread_on_fde,negate_clock_diff_with_master';
5054

55+
--connection master
5156
--echo # Flush logs on master forces slave to generate a Format description
5257
--echo # event in its relay log
5358
flush logs;
5459

5560
--connection slave
5661
--echo # On the next FDE, the slave should have the master CREATE/INSERT events
62+
--sleep $sleep_time
5763
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
5864
select count(*)=1 from t1;
5965

6066
--echo # The relay log FDE has been processed - here we check to ensure it was
6167
--echo # not considered in Seconds_Behind_Master calculation
62-
--connection slave1
63-
let $sbm= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1);
64-
--let $t_now= `SELECT UNIX_TIMESTAMP()`
65-
66-
# Ensure Seconds_Behind_Master does not point beyond when we have proven the
67-
# events we have proven to have executed. The extra second is needed as a
68-
# buffer because the recorded times are not exact with when the events were
69-
# recorded on the master.
70-
if(`select $sbm > $t_now - $t_master_events_logged + 1`)
71-
{
72-
die "A relay log event was incorrectly used to set Seconds_Behind_Master";
73-
}
74-
75-
--echo # Safely resume slave SQL thread
68+
# `Seconds_Behind_Master` must remains 0.
69+
# * MDEV-16091: `Seconds_Behind_Master` should not point at the FDE's timestamp,
70+
# which is the time of `start_slave.inc`.
71+
# * MDEV-25999 & MDEV-36840: For a new event stream, `Seconds_Behind_Master`
72+
# should not point at anything at all, because FDEs are not user content.
73+
--let $status_items= Seconds_Behind_Master
74+
--source include/show_slave_status.inc
75+
76+
--echo # Safely resume slave SQL (worker) thread
7677

7778
--let $dbug_wait_state="debug sync point: now"
7879
--echo # Prove SQL thread is in state $dbug_wait_state

sql/rpl_parallel.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,17 @@ handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev)
104104
(e->force_abort && !rli->stop_for_until))
105105
return;
106106

107+
#ifdef ENABLED_DEBUG_SYNC
108+
DBUG_EXECUTE_IF("pause_sql_thread_on_fde",
109+
DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(
110+
"now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue"
111+
)));
112+
DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(
113+
"now SIGNAL main_sql_thread_continue"
114+
)));
115+
);
116+
#endif
117+
107118
mysql_mutex_lock(&rli->data_lock);
108119
cmp= compare_log_name(rli->group_relay_log_name, qev->event_relay_log_name);
109120
if (cmp < 0)

sql/slave.cc

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4481,6 +4481,12 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
44814481
rli->event_relay_log_pos= rli->future_event_relay_log_pos;
44824482
if (res >= 0)
44834483
{
4484+
DBUG_EXECUTE_IF("pause_sql_thread_on_fde",
4485+
if (typ == FORMAT_DESCRIPTION_EVENT)
4486+
DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(
4487+
"now WAIT_FOR main_sql_thread_continue"
4488+
)));
4489+
);
44844490
#ifdef WITH_WSREP
44854491
wsrep_after_statement(thd);
44864492
#endif /* WITH_WSREP */
@@ -4511,15 +4517,6 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
45114517
{
45124518
Gtid_log_event *gev= static_cast<Gtid_log_event *>(ev);
45134519

4514-
#ifdef ENABLED_DEBUG_SYNC
4515-
DBUG_EXECUTE_IF(
4516-
"pause_sql_thread_on_relay_fde_after_trans",
4517-
{
4518-
DBUG_SET("-d,pause_sql_thread_on_relay_fde_after_trans");
4519-
DBUG_SET("+d,pause_sql_thread_on_next_relay_fde");
4520-
});
4521-
#endif
4522-
45234520
/*
45244521
For GTID, allocate a new sub_id for the given domain_id.
45254522
The sub_id must be allocated in increasing order of binlog order.
@@ -4671,16 +4668,13 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
46714668
wsrep_after_statement(thd);
46724669
#endif /* WITH_WSREP */
46734670
#ifdef ENABLED_DEBUG_SYNC
4674-
DBUG_EXECUTE_IF(
4675-
"pause_sql_thread_on_next_relay_fde",
4676-
if (ev && typ == FORMAT_DESCRIPTION_EVENT &&
4677-
((Format_description_log_event *) ev)->is_relay_log_event()) {
4678-
DBUG_ASSERT(!debug_sync_set_action(
4679-
thd,
4680-
STRING_WITH_LEN(
4681-
"now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue")));
4682-
DBUG_SET("-d,pause_sql_thread_on_next_relay_fde");
4683-
});
4671+
// Note: Parallel Replication does not hit this point.
4672+
DBUG_EXECUTE_IF("pause_sql_thread_on_fde",
4673+
if (typ == FORMAT_DESCRIPTION_EVENT)
4674+
DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(
4675+
"now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue"
4676+
)));
4677+
);
46844678
#endif
46854679

46864680
DBUG_RETURN(exec_res);

0 commit comments

Comments
 (0)