Skip to content

Commit 50f19ca

Browse files
committed
Remove unnecessary global mutex in parallel replication.
The function apply_event_and_update_pos() is called with the rli->data_lock mutex held. However, there seems to be nothing in the function actually needing the mutex to be held. Certainly not in the parallel replication case, where sql_slave_skip_counter is always 0 since the non-zero case is handled by the SQL driver thread. So this patch makes parallel replication use a variant of apply_event_and_update_pos() without the need to take the rli->data_lock mutex. This avoids one contended global mutex for each event executed, which might improve performance on CPU-bound workloads somewhat. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
1 parent 7e0c9de commit 50f19ca

File tree

3 files changed

+96
-43
lines changed

3 files changed

+96
-43
lines changed

sql/rpl_parallel.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,7 @@ rpt_handle_event(rpl_parallel_thread::queued_event *qev,
4747
if (!(ev->is_artificial_event() || ev->is_relay_log_event() ||
4848
(ev->when == 0)))
4949
rgi->last_master_timestamp= ev->when + (time_t)ev->exec_time;
50-
mysql_mutex_lock(&rli->data_lock);
51-
/* Mutex will be released in apply_event_and_update_pos(). */
52-
err= apply_event_and_update_pos(ev, thd, rgi, rpt);
50+
err= apply_event_and_update_pos_for_parallel(ev, thd, rgi);
5351

5452
thread_safe_increment64(&rli->executed_entries);
5553
/* ToDo: error handling. */

sql/slave.cc

Lines changed: 92 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3377,39 +3377,17 @@ has_temporary_error(THD *thd)
33773377
}
33783378

33793379

3380-
/**
3381-
Applies the given event and advances the relay log position.
3382-
3383-
In essence, this function does:
3384-
3385-
@code
3386-
ev->apply_event(rli);
3387-
ev->update_pos(rli);
3388-
@endcode
3389-
3390-
But it also does some maintainance, such as skipping events if
3391-
needed and reporting errors.
3392-
3393-
If the @c skip flag is set, then it is tested whether the event
3394-
should be skipped, by looking at the slave_skip_counter and the
3395-
server id. The skip flag should be set when calling this from a
3396-
replication thread but not set when executing an explicit BINLOG
3397-
statement.
3398-
3399-
@retval 0 OK.
3400-
3401-
@retval 1 Error calling ev->apply_event().
3380+
/*
3381+
First half of apply_event_and_update_pos(), see below.
3382+
Setup some THD variables for applying the event.
34023383
3403-
@retval 2 No error calling ev->apply_event(), but error calling
3404-
ev->update_pos().
3384+
Split out so that it can run with rli->data_lock held in non-parallel
3385+
replication, but without the mutex held in the parallel case.
34053386
*/
3406-
int apply_event_and_update_pos(Log_event* ev, THD* thd,
3407-
rpl_group_info *rgi,
3408-
rpl_parallel_thread *rpt)
3387+
static int
3388+
apply_event_and_update_pos_setup(Log_event* ev, THD* thd, rpl_group_info *rgi)
34093389
{
3410-
int exec_res= 0;
3411-
Relay_log_info* rli= rgi->rli;
3412-
DBUG_ENTER("apply_event_and_update_pos");
3390+
DBUG_ENTER("apply_event_and_update_pos_setup");
34133391

34143392
DBUG_PRINT("exec_event",("%s(type_code: %d; server_id: %d)",
34153393
ev->get_type_str(), ev->get_type_code(),
@@ -3459,13 +3437,23 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd,
34593437
(ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0);
34603438
ev->thd = thd; // because up to this point, ev->thd == 0
34613439

3462-
int reason= ev->shall_skip(rgi);
3463-
if (reason == Log_event::EVENT_SKIP_COUNT)
3464-
{
3465-
DBUG_ASSERT(rli->slave_skip_counter > 0);
3466-
rli->slave_skip_counter--;
3467-
}
3468-
mysql_mutex_unlock(&rli->data_lock);
3440+
DBUG_RETURN(ev->shall_skip(rgi));
3441+
}
3442+
3443+
3444+
/*
3445+
Second half of apply_event_and_update_pos(), see below.
3446+
3447+
Do the actual event apply (or skip), and position update.
3448+
*/
3449+
static int
3450+
apply_event_and_update_pos_apply(Log_event* ev, THD* thd, rpl_group_info *rgi,
3451+
int reason)
3452+
{
3453+
int exec_res= 0;
3454+
Relay_log_info* rli= rgi->rli;
3455+
3456+
DBUG_ENTER("apply_event_and_update_pos_apply");
34693457
DBUG_EXECUTE_IF("inject_slave_sql_before_apply_event",
34703458
{
34713459
DBUG_ASSERT(!debug_sync_set_action
@@ -3553,6 +3541,72 @@ int apply_event_and_update_pos(Log_event* ev, THD* thd,
35533541
}
35543542

35553543

3544+
/**
3545+
Applies the given event and advances the relay log position.
3546+
3547+
In essence, this function does:
3548+
3549+
@code
3550+
ev->apply_event(rli);
3551+
ev->update_pos(rli);
3552+
@endcode
3553+
3554+
But it also does some maintainance, such as skipping events if
3555+
needed and reporting errors.
3556+
3557+
If the @c skip flag is set, then it is tested whether the event
3558+
should be skipped, by looking at the slave_skip_counter and the
3559+
server id. The skip flag should be set when calling this from a
3560+
replication thread but not set when executing an explicit BINLOG
3561+
statement.
3562+
3563+
@retval 0 OK.
3564+
3565+
@retval 1 Error calling ev->apply_event().
3566+
3567+
@retval 2 No error calling ev->apply_event(), but error calling
3568+
ev->update_pos().
3569+
3570+
This function is only used in non-parallel replication, where it is called
3571+
with rli->data_lock held; this lock is released during this function.
3572+
*/
3573+
int
3574+
apply_event_and_update_pos(Log_event* ev, THD* thd, rpl_group_info *rgi)
3575+
{
3576+
Relay_log_info* rli= rgi->rli;
3577+
mysql_mutex_assert_owner(&rli->data_lock);
3578+
int reason= apply_event_and_update_pos_setup(ev, thd, rgi);
3579+
if (reason == Log_event::EVENT_SKIP_COUNT)
3580+
{
3581+
DBUG_ASSERT(rli->slave_skip_counter > 0);
3582+
rli->slave_skip_counter--;
3583+
}
3584+
mysql_mutex_unlock(&rli->data_lock);
3585+
return apply_event_and_update_pos_apply(ev, thd, rgi, reason);
3586+
}
3587+
3588+
3589+
/*
3590+
The version of above apply_event_and_update_pos() used in parallel
3591+
replication. Unlike the non-parallel case, this function is called without
3592+
rli->data_lock held.
3593+
*/
3594+
int
3595+
apply_event_and_update_pos_for_parallel(Log_event* ev, THD* thd,
3596+
rpl_group_info *rgi)
3597+
{
3598+
Relay_log_info* rli= rgi->rli;
3599+
mysql_mutex_assert_not_owner(&rli->data_lock);
3600+
int reason= apply_event_and_update_pos_setup(ev, thd, rgi);
3601+
/*
3602+
In parallel replication, sql_slave_skip_counter is handled in the SQL
3603+
driver thread, so 23 should never see EVENT_SKIP_COUNT here.
3604+
*/
3605+
DBUG_ASSERT(reason != Log_event::EVENT_SKIP_COUNT);
3606+
return apply_event_and_update_pos_apply(ev, thd, rgi, reason);
3607+
}
3608+
3609+
35563610
/**
35573611
Keep the relay log transaction state up to date.
35583612
@@ -3803,7 +3857,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
38033857
serial_rgi->future_event_relay_log_pos= rli->future_event_relay_log_pos;
38043858
serial_rgi->event_relay_log_name= rli->event_relay_log_name;
38053859
serial_rgi->event_relay_log_pos= rli->event_relay_log_pos;
3806-
exec_res= apply_event_and_update_pos(ev, thd, serial_rgi, NULL);
3860+
exec_res= apply_event_and_update_pos(ev, thd, serial_rgi);
38073861

38083862
#ifdef WITH_WSREP
38093863
WSREP_DEBUG("apply_event_and_update_pos() result: %d", exec_res);

sql/slave.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,9 @@ void set_slave_thread_default_charset(THD *thd, rpl_group_info *rgi);
243243
int rotate_relay_log(Master_info* mi);
244244
int has_temporary_error(THD *thd);
245245
int apply_event_and_update_pos(Log_event* ev, THD* thd,
246-
struct rpl_group_info *rgi,
247-
rpl_parallel_thread *rpt);
246+
struct rpl_group_info *rgi);
247+
int apply_event_and_update_pos_for_parallel(Log_event* ev, THD* thd,
248+
struct rpl_group_info *rgi);
248249

249250
pthread_handler_t handle_slave_io(void *arg);
250251
void slave_output_error_info(rpl_group_info *rgi, THD *thd);

0 commit comments

Comments
 (0)