Skip to content

Commit f7ba169

Browse files
committed
MDEV-36840 Seconds_Behind_Master Spike at Log Rotation on Parallel Replica
`Seconds_Behind_Master` unsets from 0 when a parallel replica processes an internal event. Est. 8dad514 of MDEV-10653, the idle status of the worker threads directly checks the emptiness of the workers’ queue. The problem is that the queue could be entirely internal events that don’t record replicated content. In contrast, the (main) SQL thread (in both serial and parallel replicas) uses a state boolean `sql_thread_caught_up` that is not unset if the event is internal. Therefore, this patch adds a workers’ equivalent, `worker_threads_caught_up`, that matches the behaviour of that for the (main) SQL thread. Reviewed-by: Brandon Nesterenko <brandon.nesterenko@mariadb.com> Acked-by: Kristian Nielsen <knielsen@knielsen-hq.org>
1 parent 8a45128 commit f7ba169

File tree

4 files changed

+61
-27
lines changed

4 files changed

+61
-27
lines changed

sql/rpl_parallel.cc

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3069,13 +3069,21 @@ rpl_parallel::stop_during_until()
30693069
}
30703070

30713071

3072-
bool
3073-
rpl_parallel::workers_idle(Relay_log_info *rli)
3072+
bool Relay_log_info::are_sql_threads_caught_up()
30743073
{
3075-
mysql_mutex_assert_owner(&rli->data_lock);
3076-
return !rli->last_inuse_relaylog ||
3077-
rli->last_inuse_relaylog->queued_count ==
3078-
rli->last_inuse_relaylog->dequeued_count;
3074+
mysql_mutex_assert_owner(&data_lock);
3075+
if (!sql_thread_caught_up)
3076+
return false;
3077+
/*
3078+
The SQL thread sets @ref worker_threads_caught_up to `false` but not `true`.
3079+
Therefore, this place needs to check if it can now be `true`.
3080+
*/
3081+
if (!worker_threads_caught_up && ( // No need to re-check if already `true`.
3082+
!last_inuse_relaylog || // `nullptr` case
3083+
last_inuse_relaylog->queued_count == last_inuse_relaylog->dequeued_count
3084+
))
3085+
worker_threads_caught_up= true; // Refresh
3086+
return worker_threads_caught_up;
30793087
}
30803088

30813089

sql/rpl_parallel.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,8 +510,6 @@ struct rpl_parallel {
510510
void stop_during_until();
511511
int wait_for_workers_idle(THD *thd);
512512
int do_event(rpl_group_info *serial_rgi, Log_event *ev, ulonglong event_size);
513-
514-
static bool workers_idle(Relay_log_info *rli);
515513
};
516514

517515

sql/rpl_rli.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,29 @@ class Relay_log_info : public Slave_reporting_capability
258258
Seconds_Behind_Master as zero while the SQL thread is so waiting.
259259
*/
260260
bool sql_thread_caught_up;
261+
/**
262+
Simple setter for @ref worker_threads_caught_up;
263+
sets it `false` to to indicate new user events in queue
264+
@pre @ref data_lock held to prevent race with is_threads_caught_up()
265+
*/
266+
inline void unset_worker_threads_caught_up()
267+
{
268+
mysql_mutex_assert_owner(&data_lock);
269+
worker_threads_caught_up= false;
270+
}
271+
/**
272+
@return
273+
`true` if both @ref sql_thread_caught_up and (refresh according to
274+
@ref last_inuse_relaylog as needed) @ref worker_threads_caught_up
275+
@pre Only meaningful if `mi->using_parallel()`
276+
@pre @ref data_lock held to prevent race condition
277+
@note
278+
Parallel replication requires the idleness of the main SQL thread as well,
279+
because after the thread sets its state to "busy" with `data_lock` held,
280+
it enqueues events *without this lock*. Not to mention any event the main
281+
thread processes itself without distribution, e.g., ignored ones.
282+
*/
283+
bool are_sql_threads_caught_up();
261284

262285
void clear_until_condition();
263286
/**
@@ -588,6 +611,22 @@ class Relay_log_info : public Slave_reporting_capability
588611
relay log.
589612
*/
590613
uint32 m_flags;
614+
615+
/**
616+
When `true`, this worker threads' copy of @ref sql_thread_caught_up
617+
represents that __every__ worker thread is waiting for new events.
618+
* The SQL driver thread sets this to `false` through
619+
unset_worker_threads_caught_up() as it prepares an event
620+
(either to enqueue a worker or, e.g., ignored events, process itself)
621+
* For the main driver or any worker thread to refresh this state immediately
622+
when it finishes, the procedure would have to be a critical section.
623+
To avoid depending on a mutex, this state instead only returns to `true`
624+
as part of its reader, are_worker_threads_caught_up().
625+
`Seconds_Behind_Master` of SHOW SLAVE STATUS uses this method (which also
626+
reads `sql_thread_caught_up`) to know when all SQL threads are waiting.
627+
@pre Only meaningful if `mi->using_parallel()`
628+
*/
629+
bool worker_threads_caught_up= true;
591630
};
592631

593632

sql/slave.cc

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3312,21 +3312,10 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full,
33123312

33133313
if (!stamp)
33143314
idle= true;
3315+
else if (mi->using_parallel())
3316+
idle= mi->rli.are_sql_threads_caught_up();
33153317
else
3316-
{
33173318
idle= mi->rli.sql_thread_caught_up;
3318-
3319-
/*
3320-
The idleness of the SQL thread is needed for the parallel slave
3321-
because events can be ignored before distribution to a worker thread.
3322-
That is, Seconds_Behind_Master should still be calculated and visible
3323-
while the slave is processing ignored events, such as those skipped
3324-
due to slave_skip_counter.
3325-
*/
3326-
if (mi->using_parallel() && idle &&
3327-
!rpl_parallel::workers_idle(&mi->rli))
3328-
idle= false;
3329-
}
33303319
if (idle)
33313320
time_diff= 0;
33323321
else
@@ -4441,20 +4430,19 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
44414430
if (rli->mi->using_parallel())
44424431
{
44434432
/*
4444-
rli->sql_thread_caught_up is checked and negated here to ensure that
4433+
Relay_log_info::are_sql_threads_caught_up()
4434+
is checked and its states are negated here to ensure that
44454435
the value of Seconds_Behind_Master in SHOW SLAVE STATUS is consistent
44464436
with the update of last_master_timestamp. It was previously unset
44474437
immediately after reading an event from the relay log; however, for the
44484438
duration between that unset and the time that LMT would be updated
44494439
could lead to spikes in SBM.
44504440
4451-
The check for queued_count == dequeued_count ensures the worker threads
4452-
are all idle (i.e. all events have been executed).
4441+
The check also ensures the worker threads
4442+
are all practically idle (i.e. all user events have been executed).
44534443
*/
44544444
if ((unlikely(rli->last_master_timestamp == 0) ||
4455-
(rli->sql_thread_caught_up &&
4456-
(rli->last_inuse_relaylog->queued_count ==
4457-
rli->last_inuse_relaylog->dequeued_count))) &&
4445+
rli->are_sql_threads_caught_up()) &&
44584446
event_can_update_last_master_timestamp(ev))
44594447
{
44604448
/*
@@ -4469,6 +4457,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
44694457
rli->last_master_timestamp= ev->when;
44704458
}
44714459
rli->sql_thread_caught_up= false;
4460+
rli->unset_worker_threads_caught_up();
44724461
}
44734462

44744463
int res= rli->parallel.do_event(serial_rgi, ev, event_size);

0 commit comments

Comments
 (0)