Skip to content

Commit d69e835

Browse files
committed
MDEV-29639: Seconds_Behind_Master is incorrect for Delayed, Parallel Replicas
Problem ======== On a parallel, delayed replica, Seconds_Behind_Master will not be calculated until after MASTER_DELAY seconds have passed and the event has finished executing, resulting in potentially very large values of Seconds_Behind_Master (which could be much larger than the MASTER_DELAY parameter) for the entire duration the event is delayed. This contradicts the documented MASTER_DELAY behavior, which specifies how many seconds to withhold replicated events from execution. Solution ======== After a parallel replica idles, the first event after idling should immediately update last_master_timestamp with the time that it began execution on the primary. Reviewed By =========== Andrei Elkin <andrei.elkin@mariadb.com>
1 parent 2ed598e commit d69e835

File tree

6 files changed

+231
-9
lines changed

6 files changed

+231
-9
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
include/master-slave.inc
2+
[connection master]
3+
connection slave;
4+
include/stop_slave.inc
5+
change master to master_delay=3, master_use_gtid=Slave_Pos;
6+
set @@GLOBAL.slave_parallel_threads=2;
7+
include/start_slave.inc
8+
connection master;
9+
create table t1 (a int);
10+
include/sync_slave_sql_with_master.inc
11+
#
12+
# Pt 1) Ensure SBM is updated immediately upon arrival of the next event
13+
# Lock t1 on slave so the first received transaction does not complete/commit
14+
connection slave;
15+
LOCK TABLES t1 WRITE;
16+
connection master;
17+
# Sleep 2 to allow a buffer between events for SBM check
18+
insert into t1 values (0);
19+
include/save_master_gtid.inc
20+
connection slave;
21+
# Waiting for transaction to arrive on slave and begin SQL Delay..
22+
# Validating SBM is updated on event arrival..
23+
# ..done
24+
connection slave;
25+
UNLOCK TABLES;
26+
include/sync_with_master_gtid.inc
27+
#
28+
# Pt 2) If the SQL thread has not entered an idle state, ensure
29+
# following events do not update SBM
30+
# Stop slave IO thread so it receives both events together on restart
31+
connection slave;
32+
include/stop_slave_io.inc
33+
connection master;
34+
# Sleep 2 to allow a buffer between events for SBM check
35+
insert into t1 values (1);
36+
# Sleep 3 to create gap between events
37+
insert into t1 values (2);
38+
connection slave;
39+
LOCK TABLES t1 WRITE;
40+
START SLAVE IO_THREAD;
41+
# Wait for first transaction to complete SQL delay and begin execution..
42+
# Validate SBM calculation doesn't use the second transaction because SQL thread shouldn't have gone idle..
43+
# ..and that SBM wasn't calculated using prior committed transactions
44+
# ..done
45+
connection slave;
46+
UNLOCK TABLES;
47+
#
48+
# Cleanup
49+
# Reset master_delay
50+
include/stop_slave.inc
51+
CHANGE MASTER TO master_delay=0;
52+
set @@GLOBAL.slave_parallel_threads=4;
53+
include/start_slave.inc
54+
connection master;
55+
DROP TABLE t1;
56+
include/save_master_gtid.inc
57+
connection slave;
58+
include/sync_with_master_gtid.inc
59+
include/rpl_end.inc
60+
# End of rpl_delayed_parallel_slave_sbm.test
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--slave-parallel-threads=4
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#
2+
# This test ensures that after a delayed parallel slave has idled, i.e.
3+
# executed everything in its relay log, the next event group that the SQL
4+
# thread reads from the relay log will immediately be used in the
5+
# Seconds_Behind_Master. In particular, it ensures that the calculation for
6+
# Seconds_Behind_Master is based on the timestamp of the new transaction,
7+
# rather than the last committed transaction.
8+
#
9+
# References:
10+
# MDEV-29639: Seconds_Behind_Master is incorrect for Delayed, Parallel
11+
# Replicas
12+
#
13+
14+
--source include/master-slave.inc
15+
16+
--connection slave
17+
--source include/stop_slave.inc
18+
--let $master_delay= 3
19+
--eval change master to master_delay=$master_delay, master_use_gtid=Slave_Pos
20+
--let $old_slave_threads= `SELECT @@GLOBAL.slave_parallel_threads`
21+
set @@GLOBAL.slave_parallel_threads=2;
22+
--source include/start_slave.inc
23+
24+
--connection master
25+
create table t1 (a int);
26+
--source include/sync_slave_sql_with_master.inc
27+
28+
--echo #
29+
--echo # Pt 1) Ensure SBM is updated immediately upon arrival of the next event
30+
31+
--echo # Lock t1 on slave so the first received transaction does not complete/commit
32+
--connection slave
33+
LOCK TABLES t1 WRITE;
34+
35+
--connection master
36+
--echo # Sleep 2 to allow a buffer between events for SBM check
37+
sleep 2;
38+
39+
--let $ts_trx_before_ins= `SELECT UNIX_TIMESTAMP()`
40+
--let insert_ctr= 0
41+
--eval insert into t1 values ($insert_ctr)
42+
--inc $insert_ctr
43+
--source include/save_master_gtid.inc
44+
45+
--connection slave
46+
47+
--echo # Waiting for transaction to arrive on slave and begin SQL Delay..
48+
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting until MASTER_DELAY seconds after master executed event';
49+
--source include/wait_condition.inc
50+
51+
--echo # Validating SBM is updated on event arrival..
52+
--let $sbm_trx1_arrive= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
53+
--let $seconds_since_idling= `SELECT UNIX_TIMESTAMP() - $ts_trx_before_ins`
54+
if (`SELECT $sbm_trx1_arrive > ($seconds_since_idling + 1)`)
55+
{
56+
--echo # SBM was $sbm_trx1_arrive yet shouldn't have been larger than $seconds_since_idling + 1 (for possible negative clock_diff_with_master)
57+
--die Seconds_Behind_Master should reset after idling
58+
}
59+
--echo # ..done
60+
61+
--connection slave
62+
UNLOCK TABLES;
63+
--source include/sync_with_master_gtid.inc
64+
65+
--echo #
66+
--echo # Pt 2) If the SQL thread has not entered an idle state, ensure
67+
--echo # following events do not update SBM
68+
69+
--echo # Stop slave IO thread so it receives both events together on restart
70+
--connection slave
71+
--source include/stop_slave_io.inc
72+
73+
--connection master
74+
75+
--echo # Sleep 2 to allow a buffer between events for SBM check
76+
sleep 2;
77+
--let $ts_trxpt2_before_ins= `SELECT UNIX_TIMESTAMP()`
78+
--eval insert into t1 values ($insert_ctr)
79+
--inc $insert_ctr
80+
--echo # Sleep 3 to create gap between events
81+
sleep 3;
82+
--eval insert into t1 values ($insert_ctr)
83+
--inc $insert_ctr
84+
--let $ts_trx_after_ins= `SELECT UNIX_TIMESTAMP()`
85+
86+
--connection slave
87+
LOCK TABLES t1 WRITE;
88+
89+
START SLAVE IO_THREAD;
90+
91+
--echo # Wait for first transaction to complete SQL delay and begin execution..
92+
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock%' AND command LIKE 'Slave_Worker';
93+
--source include/wait_condition.inc
94+
95+
--echo # Validate SBM calculation doesn't use the second transaction because SQL thread shouldn't have gone idle..
96+
--let $sbm_after_trx_no_idle= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
97+
--let $timestamp_trxpt2_arrive= `SELECT UNIX_TIMESTAMP()`
98+
if (`SELECT $sbm_after_trx_no_idle < $timestamp_trxpt2_arrive - $ts_trx_after_ins`)
99+
{
100+
--let $cmpv= `SELECT $timestamp_trxpt2_arrive - $ts_trx_after_ins`
101+
--echo # SBM $sbm_after_trx_no_idle was more recent than time since last transaction ($cmpv seconds)
102+
--die Seconds_Behind_Master should not have used second transaction timestamp
103+
}
104+
--let $seconds_since_idling= `SELECT ($timestamp_trxpt2_arrive - $ts_trxpt2_before_ins)`
105+
--echo # ..and that SBM wasn't calculated using prior committed transactions
106+
if (`SELECT $sbm_after_trx_no_idle > ($seconds_since_idling + 1)`)
107+
{
108+
--echo # SBM was $sbm_after_trx_no_idle yet shouldn't have been larger than $seconds_since_idling + 1 (for possible negative clock_diff_with_master)
109+
--die Seconds_Behind_Master calculation should not have used prior committed transaction
110+
}
111+
--echo # ..done
112+
113+
--connection slave
114+
UNLOCK TABLES;
115+
116+
--echo #
117+
--echo # Cleanup
118+
119+
--echo # Reset master_delay
120+
--source include/stop_slave.inc
121+
--eval CHANGE MASTER TO master_delay=0
122+
--eval set @@GLOBAL.slave_parallel_threads=$old_slave_threads
123+
--source include/start_slave.inc
124+
125+
--connection master
126+
DROP TABLE t1;
127+
--source include/save_master_gtid.inc
128+
129+
--connection slave
130+
--source include/sync_with_master_gtid.inc
131+
132+
--source include/rpl_end.inc
133+
--echo # End of rpl_delayed_parallel_slave_sbm.test

sql/rpl_parallel.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev,
4545
rgi->event_relay_log_pos= qev->event_relay_log_pos;
4646
rgi->future_event_relay_log_pos= qev->future_event_relay_log_pos;
4747
strcpy(rgi->future_event_master_log_name, qev->future_event_master_log_name);
48-
if (!(ev->is_artificial_event() || ev->is_relay_log_event() ||
49-
(ev->when == 0)))
48+
if (event_can_update_last_master_timestamp(ev))
5049
rgi->last_master_timestamp= ev->when + (time_t)ev->exec_time;
5150
err= apply_event_and_update_pos_for_parallel(ev, thd, rgi);
5251

sql/slave.cc

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4125,10 +4125,10 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
41254125
the user might be surprised to see a claim that the slave is up to date
41264126
long before those queued events are actually executed.
41274127
*/
4128-
if (!rli->mi->using_parallel() &&
4129-
!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0)))
4128+
if ((!rli->mi->using_parallel()) && event_can_update_last_master_timestamp(ev))
41304129
{
41314130
rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
4131+
rli->sql_thread_caught_up= false;
41324132
DBUG_ASSERT(rli->last_master_timestamp >= 0);
41334133
}
41344134

@@ -4177,6 +4177,17 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
41774177

41784178
if (rli->mi->using_parallel())
41794179
{
4180+
if (unlikely((rli->last_master_timestamp == 0 ||
4181+
rli->sql_thread_caught_up) &&
4182+
event_can_update_last_master_timestamp(ev)))
4183+
{
4184+
if (rli->last_master_timestamp < ev->when)
4185+
{
4186+
rli->last_master_timestamp= ev->when;
4187+
rli->sql_thread_caught_up= false;
4188+
}
4189+
}
4190+
41804191
int res= rli->parallel.do_event(serial_rgi, ev, event_size);
41814192
/*
41824193
In parallel replication, we need to update the relay log position
@@ -4192,15 +4203,16 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
41924203
This is the case for pre-10.0 events without GTID, and for handling
41934204
slave_skip_counter.
41944205
*/
4195-
if (!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0)))
4206+
if (event_can_update_last_master_timestamp(ev))
41964207
{
41974208
/*
41984209
Ignore FD's timestamp as it does not reflect the slave execution
41994210
state but likely to reflect a deep past. Consequently when the first
42004211
data modification event execution last long all this time
42014212
Seconds_Behind_Master is zero.
42024213
*/
4203-
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
4214+
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT &&
4215+
rli->last_master_timestamp < ev->when)
42044216
rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
42054217

42064218
DBUG_ASSERT(rli->last_master_timestamp >= 0);
@@ -7544,7 +7556,6 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
75447556

75457557
if (hot_log)
75467558
mysql_mutex_unlock(log_lock);
7547-
rli->sql_thread_caught_up= false;
75487559
DBUG_RETURN(ev);
75497560
}
75507561
if (opt_reckless_slave) // For mysql-test
@@ -7711,7 +7722,6 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
77117722
rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd);
77127723
// re-acquire data lock since we released it earlier
77137724
mysql_mutex_lock(&rli->data_lock);
7714-
rli->sql_thread_caught_up= false;
77157725
continue;
77167726
}
77177727
/*
@@ -7902,12 +7912,19 @@ event(errno: %d cur_log->error: %d)",
79027912
{
79037913
sql_print_information("Error reading relay log event: %s",
79047914
"slave SQL thread was killed");
7905-
DBUG_RETURN(0);
7915+
goto end;
79067916
}
79077917

79087918
err:
79097919
if (errmsg)
79107920
sql_print_error("Error reading relay log event: %s", errmsg);
7921+
7922+
end:
7923+
/*
7924+
Set that we are not caught up so if there is a hang/problem on restart,
7925+
Seconds_Behind_Master will still grow.
7926+
*/
7927+
rli->sql_thread_caught_up= false;
79117928
DBUG_RETURN(0);
79127929
}
79137930
#ifdef WITH_WSREP

sql/slave.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include "rpl_filter.h"
5050
#include "rpl_tblmap.h"
5151
#include "rpl_gtid.h"
52+
#include "log_event.h"
5253

5354
#define SLAVE_NET_TIMEOUT 60
5455

@@ -293,6 +294,17 @@ extern char *report_host, *report_password;
293294

294295
extern I_List<THD> threads;
295296

297+
/*
298+
Check that a binlog event (read from the relay log) is valid to update
299+
last_master_timestamp. That is, a valid event is one with a consistent
300+
timestamp which originated from a primary server.
301+
*/
302+
static inline bool event_can_update_last_master_timestamp(Log_event *ev)
303+
{
304+
return ev && !(ev->is_artificial_event() || ev->is_relay_log_event() ||
305+
(ev->when == 0));
306+
}
307+
296308
#else
297309
#define close_active_mi() /* no-op */
298310
#endif /* HAVE_REPLICATION */

0 commit comments

Comments
 (0)