Skip to content

Commit

Permalink
Revert "MDEV-23328 Server hang due to Galera lock conflict resolution…
Browse files Browse the repository at this point in the history
…" and

Revert "MDEV-24873 galera.galera_as_slave_ctas MTR failed:..."

This reverts commit 29bbcac and
later commit 5ecaf52.
  • Loading branch information
Jan Lindström committed Sep 24, 2021
1 parent 1cb218c commit 9d97f92
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 112 deletions.
2 changes: 2 additions & 0 deletions sql/wsrep_mysqld.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2762,7 +2762,9 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal)
{
if (signal)
{
mysql_mutex_lock(&thd->LOCK_thd_data);
thd->awake(KILL_QUERY);
mysql_mutex_unlock(&thd->LOCK_thd_data);
}
else
{
Expand Down
184 changes: 72 additions & 112 deletions storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ this program; if not, write to the Free Software Foundation, Inc.,

#include <my_service_manager.h>
#include <key.h>
#include <sql_manager.h>

/* Include necessary InnoDB headers */
#include "btr0btr.h"
Expand Down Expand Up @@ -19536,68 +19535,61 @@ wsrep_abort_slave_trx(
(long long)bf_seqno, (long long)victim_seqno);
abort();
}

struct bg_wsrep_kill_trx_arg {
my_thread_id thd_id;
trx_id_t trx_id;
int64_t bf_seqno;
ibool signal;
};

static void bg_wsrep_kill_trx(
void *void_arg)
/*******************************************************************//**
This function is used to kill one transaction in BF. */
UNIV_INTERN
void
wsrep_innobase_kill_one_trx(
/*========================*/
MYSQL_THD const bf_thd,
const trx_t * const bf_trx,
trx_t *victim_trx,
ibool signal)
{
bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)void_arg;
THD *thd = find_thread_by_id(arg->thd_id, false);
trx_t *victim_trx = NULL;
bool awake = false;
DBUG_ENTER("bg_wsrep_kill_trx");
ut_ad(bf_thd);
ut_ad(victim_trx);
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(victim_trx));

if (thd) {
victim_trx= thd_to_trx(thd);
/* Victim trx might not exist e.g. on MDL-conflict. */
if (victim_trx) {
lock_mutex_enter();
trx_mutex_enter(victim_trx);
if (victim_trx->id != arg->trx_id ||
victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY)
{
/* Victim was meanwhile rolled back or
committed */
trx_mutex_exit(victim_trx);
lock_mutex_exit();
wsrep_thd_UNLOCK(thd);
victim_trx= NULL;
}
} else {
/* find_thread_by_id locked
THD::LOCK_thd_data */
wsrep_thd_UNLOCK(thd);
}
}
DBUG_ENTER("wsrep_innobase_kill_one_trx");
THD *thd = (THD *) victim_trx->mysql_thd;
int64_t bf_seqno = wsrep_thd_trx_seqno(bf_thd);

if (!victim_trx) {
/* Victim trx might not exist (MDL-conflict) or victim
was meanwhile rolled back or committed because of
a KILL statement or a disconnect. */
goto ret;
if (!thd) {
DBUG_PRINT("wsrep", ("no thd for conflicting lock"));
WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id);
DBUG_VOID_RETURN;
}

WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);

WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF
"), victim: (%lu) trx: " TRX_ID_FMT,
arg->signal, arg->bf_seqno,
signal, bf_seqno,
thd_get_thread_id(thd),
victim_trx->id);

WSREP_DEBUG("Aborting query: %s conf %d trx: %" PRId64,
(wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void",
(thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void",
wsrep_thd_conflict_state(thd, FALSE),
wsrep_thd_ws_handle(thd)->trx_id);

wsrep_thd_LOCK(thd);
DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock",
{
const char act[]=
"now "
"wait_for signal.wsrep_after_BF_victim_lock";
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
STRING_WITH_LEN(act)));
};);


if (wsrep_thd_query_state(thd) == QUERY_EXITING) {
WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT,
victim_trx->id);
goto ret_unlock;
wsrep_thd_UNLOCK(thd);
DBUG_VOID_RETURN;
}

if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
Expand All @@ -19613,13 +19605,18 @@ static void bg_wsrep_kill_trx(
case MUST_ABORT:
WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state",
victim_trx->id);
goto ret_awake;
wsrep_thd_UNLOCK(thd);
wsrep_thd_awake(thd, signal);
DBUG_VOID_RETURN;
break;
case ABORTED:
case ABORTING: // fall through
default:
WSREP_DEBUG("victim " TRX_ID_FMT " in state %d",
victim_trx->id, wsrep_thd_get_conflict_state(thd));
goto ret_unlock;
wsrep_thd_UNLOCK(thd);
DBUG_VOID_RETURN;
break;
}

switch (wsrep_thd_query_state(thd)) {
Expand All @@ -19632,12 +19629,12 @@ static void bg_wsrep_kill_trx(
victim_trx->id);

if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
wsrep_abort_slave_trx(arg->bf_seqno,
wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd));
} else {
wsrep_t *wsrep= get_wsrep();
rcode = wsrep->abort_pre_commit(
wsrep, arg->bf_seqno,
wsrep, bf_seqno,
(wsrep_trx_id_t)wsrep_thd_ws_handle(thd)->trx_id
);

Expand All @@ -19646,7 +19643,10 @@ static void bg_wsrep_kill_trx(
WSREP_DEBUG("cancel commit warning: "
TRX_ID_FMT,
victim_trx->id);
goto ret_awake;
wsrep_thd_UNLOCK(thd);
wsrep_thd_awake(thd, signal);
DBUG_VOID_RETURN;
break;
case WSREP_OK:
break;
default:
Expand All @@ -19659,9 +19659,12 @@ static void bg_wsrep_kill_trx(
* kill the lock holder first.
*/
abort();
break;
}
}
goto ret_awake;
wsrep_thd_UNLOCK(thd);
wsrep_thd_awake(thd, signal);
break;
case QUERY_EXEC:
/* it is possible that victim trx is itself waiting for some
* other lock. We need to cancel this waiting
Expand All @@ -19682,30 +19685,37 @@ static void bg_wsrep_kill_trx(
lock_cancel_waiting_and_release(wait_lock);
}

wsrep_thd_UNLOCK(thd);
wsrep_thd_awake(thd, signal);
} else {
/* abort currently executing query */
DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu",
thd_get_thread_id(thd)));
WSREP_DEBUG("kill query for: %ld",
thd_get_thread_id(thd));
/* Note that innobase_kill_query will take lock_mutex
and trx_mutex */
wsrep_thd_UNLOCK(thd);
wsrep_thd_awake(thd, signal);

/* for BF thd, we need to prevent him from committing */
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
wsrep_abort_slave_trx(arg->bf_seqno,
wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd));
}
}
goto ret_awake;
break;
case QUERY_IDLE:
{
WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id);

if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
WSREP_DEBUG("kill BF IDLE, seqno: %lld",
(long long)wsrep_thd_trx_seqno(thd));
wsrep_abort_slave_trx(arg->bf_seqno,
wsrep_thd_UNLOCK(thd);
wsrep_abort_slave_trx(bf_seqno,
wsrep_thd_trx_seqno(thd));
goto ret_unlock;
DBUG_VOID_RETURN;
}
/* This will lock thd from proceeding after net_read() */
wsrep_thd_set_conflict_state(thd, ABORTING);
Expand All @@ -19726,67 +19736,17 @@ static void bg_wsrep_kill_trx(
DBUG_PRINT("wsrep",("signalling wsrep rollbacker"));
WSREP_DEBUG("signaling aborter");
wsrep_unlock_rollback();
goto ret_unlock;
wsrep_thd_UNLOCK(thd);

break;
}
default:
WSREP_WARN("bad wsrep query state: %d",
wsrep_thd_query_state(thd));
goto ret_unlock;
wsrep_thd_UNLOCK(thd);
break;
}

ret_awake:
awake= true;

ret_unlock:
trx_mutex_exit(victim_trx);
lock_mutex_exit();
if (awake)
wsrep_thd_awake(thd, arg->signal);
wsrep_thd_UNLOCK(thd);

ret:
free(arg);
DBUG_VOID_RETURN;

}

/*******************************************************************//**
This function is used to kill one transaction in BF. */
UNIV_INTERN
void
wsrep_innobase_kill_one_trx(
/*========================*/
MYSQL_THD const bf_thd,
const trx_t * const bf_trx,
trx_t *victim_trx,
ibool signal)
{
ut_ad(bf_thd);
ut_ad(victim_trx);
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(victim_trx));

bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)malloc(sizeof(*arg));
arg->thd_id = thd_get_thread_id(victim_trx->mysql_thd);
arg->trx_id = victim_trx->id;
arg->bf_seqno = wsrep_thd_trx_seqno((THD*)bf_thd);
arg->signal = signal;

DBUG_ENTER("wsrep_innobase_kill_one_trx");

WSREP_LOG_CONFLICT(bf_thd, victim_trx->mysql_thd, TRUE);

DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock",
{
const char act[]=
"now "
"wait_for signal.wsrep_after_BF_victim_lock";
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
STRING_WITH_LEN(act)));
};);


mysql_manager_submit(bg_wsrep_kill_trx, arg);
DBUG_VOID_RETURN;
}

Expand Down Expand Up @@ -19821,8 +19781,8 @@ wsrep_abort_transaction(
WSREP_DEBUG("victim does not have transaction");
wsrep_thd_LOCK(victim_thd);
wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT);
wsrep_thd_awake(victim_thd, signal);
wsrep_thd_UNLOCK(victim_thd);
wsrep_thd_awake(victim_thd, signal);
}

DBUG_VOID_RETURN;
Expand Down

0 comments on commit 9d97f92

Please sign in to comment.