From 9856bb4245177cb290f771f1010299acf221c869 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Sun, 2 Jul 2023 21:16:03 +0200 Subject: [PATCH] MDEV-31602: Race on rpl_global_gtid_slave_state when starting IO thread Fix that rpl_slave_state::load() was calling rpl_slave_state::update() without holding LOCK_slave_state. Reviewed-by: Monty Signed-off-by: Kristian Nielsen --- sql/rpl_gtid.cc | 17 ++++++++++++++--- sql/rpl_gtid.h | 2 ++ sql/rpl_rli.cc | 2 +- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index 6f2c01383abd8..e7ff89248747f 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -45,9 +45,7 @@ rpl_slave_state::update_state_hash(uint64 sub_id, rpl_gtid *gtid, void *hton, there will not be an attempt to delete the corresponding table row before it is even committed. */ - mysql_mutex_lock(&LOCK_slave_state); err= update(gtid->domain_id, gtid->server_id, sub_id, gtid->seq_no, hton, rgi); - mysql_mutex_unlock(&LOCK_slave_state); if (err) { sql_print_warning("Slave: Out of memory during slave state maintenance. " @@ -290,11 +288,24 @@ rpl_slave_state::truncate_hash() int rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id, uint64 seq_no, void *hton, rpl_group_info *rgi) +{ + int res; + mysql_mutex_lock(&LOCK_slave_state); + res= update_nolock(domain_id, server_id, sub_id, seq_no, hton, rgi); + mysql_mutex_unlock(&LOCK_slave_state); + return res; +} + + +int +rpl_slave_state::update_nolock(uint32 domain_id, uint32 server_id, uint64 sub_id, + uint64 seq_no, void *hton, rpl_group_info *rgi) { element *elem= NULL; list_element *list_elem= NULL; DBUG_ASSERT(hton || !loaded); + mysql_mutex_assert_owner(&LOCK_slave_state); if (!(elem= get_element(domain_id))) return 1; @@ -308,7 +319,6 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id, of all pending MASTER_GTID_WAIT(), so we do not slow down the replication SQL thread. */ - mysql_mutex_assert_owner(&LOCK_slave_state); elem->gtid_waiter= NULL; mysql_cond_broadcast(&elem->COND_wait_gtid); } @@ -1356,6 +1366,7 @@ rpl_slave_state::load(THD *thd, const char *state_from_master, size_t len, { const char *end= state_from_master + len; + mysql_mutex_assert_not_owner(&LOCK_slave_state); if (reset) { if (truncate_state_table(thd)) diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h index 081f7e309f466..1a2fb2bae85ce 100644 --- a/sql/rpl_gtid.h +++ b/sql/rpl_gtid.h @@ -231,6 +231,8 @@ struct rpl_slave_state ulong count() const { return hash.records; } int update(uint32 domain_id, uint32 server_id, uint64 sub_id, uint64 seq_no, void *hton, rpl_group_info *rgi); + int update_nolock(uint32 domain_id, uint32 server_id, uint64 sub_id, + uint64 seq_no, void *hton, rpl_group_info *rgi); int truncate_state_table(THD *thd); void select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename); int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 04fddb3e74bff..214650f1e292d 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1927,7 +1927,7 @@ rpl_load_gtid_slave_state(THD *thd) for (i= 0; i < array.elements; ++i) { get_dynamic(&array, (uchar *)&tmp_entry, i); - if ((err= rpl_global_gtid_slave_state->update(tmp_entry.gtid.domain_id, + if ((err= rpl_global_gtid_slave_state->update_nolock(tmp_entry.gtid.domain_id, tmp_entry.gtid.server_id, tmp_entry.sub_id, tmp_entry.gtid.seq_no,