Skip to content
Permalink
Browse files
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.

On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.

In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
  • Loading branch information
sjaakola committed Apr 24, 2018
1 parent 82d4f08 commit 2f0b8f3
Show file tree
Hide file tree
Showing 14 changed files with 94 additions and 106 deletions.
@@ -3898,10 +3898,10 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
DBUG_RETURN(1);

#ifdef WITH_WSREP
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
if (thd->wsrep_conflict_state == NO_CONFLICT)
{
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
#endif /* WITH_WSREP */
if (slave_trans_retries)
{
@@ -3978,7 +3978,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
#ifdef WITH_WSREP
}
else
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
#endif /* WITH_WSREP */

thread_safe_increment64(&rli->executed_entries);
@@ -1008,7 +1008,6 @@ THD::THD(bool is_wsrep_applier)
*scramble= '\0';

#ifdef WITH_WSREP
mysql_mutex_init(key_LOCK_wsrep_thd, &LOCK_wsrep_thd, MY_MUTEX_INIT_FAST);
wsrep_ws_handle.trx_id = WSREP_UNDEFINED_TRX_ID;
wsrep_ws_handle.opaque = NULL;
wsrep_retry_counter = 0;
@@ -1644,9 +1643,6 @@ THD::~THD()
mysql_mutex_unlock(&LOCK_thd_data);

#ifdef WITH_WSREP
mysql_mutex_lock(&LOCK_wsrep_thd);
mysql_mutex_unlock(&LOCK_wsrep_thd);
mysql_mutex_destroy(&LOCK_wsrep_thd);
if (wsrep_rgi) delete wsrep_rgi;
#endif
/* Close connection */
@@ -4105,7 +4105,6 @@ class THD :public Statement,
query_id_t wsrep_last_query_id;
enum wsrep_query_state wsrep_query_state;
enum wsrep_conflict_state wsrep_conflict_state;
mysql_mutex_t LOCK_wsrep_thd;
wsrep_trx_meta_t wsrep_trx_meta;
uint32 wsrep_rand;
Relay_log_info *wsrep_rli;
@@ -1335,9 +1335,9 @@ void do_handle_one_connection(THD *thd_arg)
#ifdef WITH_WSREP
if (WSREP(thd))
{
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->wsrep_query_state= QUERY_EXITING;
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif
end_thread:
@@ -4388,16 +4388,16 @@ bool select_create::send_eof()
#ifdef WITH_WSREP
if (WSREP_ON)
{
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
if (thd->wsrep_conflict_state != NO_CONFLICT)
{
WSREP_DEBUG("select_create commit failed, thd: %lu err: %d %s",
thd->thread_id, thd->wsrep_conflict_state, thd->query());
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
abort_result_set();
DBUG_RETURN(true);
}
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif /* WITH_WSREP */
}
@@ -939,13 +939,13 @@ bool do_command(THD *thd)
#ifdef WITH_WSREP
if (WSREP(thd))
{
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->wsrep_query_state= QUERY_IDLE;
if (thd->wsrep_conflict_state==MUST_ABORT)
{
wsrep_client_rollback(thd);
}
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif /* WITH_WSREP */

@@ -991,15 +991,15 @@ bool do_command(THD *thd)
packet_length= my_net_read_packet(net, 1);
#ifdef WITH_WSREP
if (WSREP(thd)) {
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);

/* these THD's are aborted or are aborting during being idle */
if (thd->wsrep_conflict_state == ABORTING)
{
while (thd->wsrep_conflict_state == ABORTING) {
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
my_sleep(1000);
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
}
thd->store_globals();
}
@@ -1009,7 +1009,7 @@ bool do_command(THD *thd)
}

thd->wsrep_query_state= QUERY_EXEC;
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif /* WITH_WSREP */

@@ -1022,13 +1022,13 @@ bool do_command(THD *thd)
#ifdef WITH_WSREP
if (WSREP(thd))
{
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
if (thd->wsrep_conflict_state == MUST_ABORT)
{
DBUG_PRINT("wsrep",("aborted for wsrep rollback: %lu", thd->real_id));
wsrep_client_rollback(thd);
}
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif /* WITH_WSREP */

@@ -1256,7 +1256,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
thd->wsrep_PA_safe= true;
}

mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->wsrep_query_state= QUERY_EXEC;
if (thd->wsrep_conflict_state== RETRY_AUTOCOMMIT)
{
@@ -1272,14 +1272,14 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
{
my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction");
WSREP_DEBUG("Deadlock error for: %s", thd->query());
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
thd->reset_killed();
thd->mysys_var->abort = 0;
thd->wsrep_conflict_state = NO_CONFLICT;
thd->wsrep_retry_counter = 0;
goto dispatch_end;
}
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif /* WITH_WSREP */
#if defined(ENABLED_PROFILING)
@@ -1934,10 +1934,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
DBUG_ASSERT((command != COM_QUIT && command != COM_STMT_CLOSE)
|| thd->get_stmt_da()->is_disabled());
/* wsrep BF abort in query exec phase */
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
do_end_of_statement= thd->wsrep_conflict_state != REPLAYING &&
thd->wsrep_conflict_state != RETRY_AUTOCOMMIT;
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
else
do_end_of_statement= true;
@@ -7192,7 +7192,7 @@ static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,

if (WSREP(thd)) {
/* wsrep BF abort in query exec phase */
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
if (thd->wsrep_conflict_state == MUST_ABORT) {
wsrep_client_rollback(thd);

@@ -7201,8 +7201,11 @@ static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,

if (thd->wsrep_conflict_state == MUST_REPLAY)
{
mysql_mutex_unlock(&thd->LOCK_thd_data);
if (thd->lex->explain)
delete_explain_query(thd->lex);
mysql_mutex_lock(&thd->LOCK_thd_data);

wsrep_replay_transaction(thd);
}

@@ -7242,13 +7245,13 @@ static void wsrep_mysql_parse(THD *thd, char *rawbuf, uint length,
if (thd->wsrep_conflict_state != REPLAYING)
thd->wsrep_retry_counter= 0; // reset
}
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
thd->reset_killed();
}
else
{
set_if_smaller(thd->wsrep_retry_counter, 0); // reset; eventually ok
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
}

@@ -8207,7 +8210,7 @@ kill_one_thread(THD *thd, longlong id, killed_state kill_signal, killed_type typ

if (((thd->security_ctx->master_access & SUPER_ACL) ||
thd->security_ctx->user_matches(tmp->security_ctx)) &&
!wsrep_thd_is_BF(tmp, true))
!wsrep_thd_is_BF(tmp, false))
{
tmp->awake(kill_signal);
error=0;
@@ -3920,7 +3920,7 @@ Prepared_statement::execute_loop(String *expanded_query,

if (WSREP_ON)
{
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
switch (thd->wsrep_conflict_state)
{
case CERT_FAILURE:
@@ -3936,7 +3936,7 @@ Prepared_statement::execute_loop(String *expanded_query,
default:
break;
}
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
#endif /* WITH_WSREP */

@@ -3958,7 +3958,6 @@ Prepared_statement::execute_loop(String *expanded_query,
return error;
}


bool
Prepared_statement::execute_server_runnable(Server_runnable *server_runnable)
{
@@ -98,11 +98,11 @@ static wsrep_cb_status_t wsrep_apply_events(THD* thd,
DBUG_RETURN(WSREP_CB_FAILURE);
}

mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->wsrep_query_state= QUERY_EXEC;
if (thd->wsrep_conflict_state!= REPLAYING)
thd->wsrep_conflict_state= NO_CONFLICT;
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);

if (!buf_len) WSREP_DEBUG("empty rbr buffer to apply: %lld",
(long long) wsrep_thd_trx_seqno(thd));
@@ -197,9 +197,9 @@ static wsrep_cb_status_t wsrep_apply_events(THD* thd,
}

error:
mysql_mutex_lock(&thd->LOCK_wsrep_thd);
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->wsrep_query_state= QUERY_IDLE;
mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
mysql_mutex_unlock(&thd->LOCK_thd_data);

assert(thd->wsrep_exec_mode== REPL_RECV);

0 comments on commit 2f0b8f3

Please sign in to comment.