Skip to content

Commit 9d97f92

Browse files
author
Jan Lindström
committed
Revert "MDEV-23328 Server hang due to Galera lock conflict resolution" and
Revert "MDEV-24873 galera.galera_as_slave_ctas MTR failed:..." This reverts commit 29bbcac and later commit 5ecaf52.
1 parent 1cb218c commit 9d97f92

File tree

2 files changed

+74
-112
lines changed

2 files changed

+74
-112
lines changed

sql/wsrep_mysqld.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2762,7 +2762,9 @@ extern "C" void wsrep_thd_awake(THD *thd, my_bool signal)
27622762
{
27632763
if (signal)
27642764
{
2765+
mysql_mutex_lock(&thd->LOCK_thd_data);
27652766
thd->awake(KILL_QUERY);
2767+
mysql_mutex_unlock(&thd->LOCK_thd_data);
27662768
}
27672769
else
27682770
{

storage/innobase/handler/ha_innodb.cc

Lines changed: 72 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
6060

6161
#include <my_service_manager.h>
6262
#include <key.h>
63-
#include <sql_manager.h>
6463

6564
/* Include necessary InnoDB headers */
6665
#include "btr0btr.h"
@@ -19536,68 +19535,61 @@ wsrep_abort_slave_trx(
1953619535
(long long)bf_seqno, (long long)victim_seqno);
1953719536
abort();
1953819537
}
19539-
19540-
struct bg_wsrep_kill_trx_arg {
19541-
my_thread_id thd_id;
19542-
trx_id_t trx_id;
19543-
int64_t bf_seqno;
19544-
ibool signal;
19545-
};
19546-
19547-
static void bg_wsrep_kill_trx(
19548-
void *void_arg)
19538+
/*******************************************************************//**
19539+
This function is used to kill one transaction in BF. */
19540+
UNIV_INTERN
19541+
void
19542+
wsrep_innobase_kill_one_trx(
19543+
/*========================*/
19544+
MYSQL_THD const bf_thd,
19545+
const trx_t * const bf_trx,
19546+
trx_t *victim_trx,
19547+
ibool signal)
1954919548
{
19550-
bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)void_arg;
19551-
THD *thd = find_thread_by_id(arg->thd_id, false);
19552-
trx_t *victim_trx = NULL;
19553-
bool awake = false;
19554-
DBUG_ENTER("bg_wsrep_kill_trx");
19549+
ut_ad(bf_thd);
19550+
ut_ad(victim_trx);
19551+
ut_ad(lock_mutex_own());
19552+
ut_ad(trx_mutex_own(victim_trx));
1955519553

19556-
if (thd) {
19557-
victim_trx= thd_to_trx(thd);
19558-
/* Victim trx might not exist e.g. on MDL-conflict. */
19559-
if (victim_trx) {
19560-
lock_mutex_enter();
19561-
trx_mutex_enter(victim_trx);
19562-
if (victim_trx->id != arg->trx_id ||
19563-
victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY)
19564-
{
19565-
/* Victim was meanwhile rolled back or
19566-
committed */
19567-
trx_mutex_exit(victim_trx);
19568-
lock_mutex_exit();
19569-
wsrep_thd_UNLOCK(thd);
19570-
victim_trx= NULL;
19571-
}
19572-
} else {
19573-
/* find_thread_by_id locked
19574-
THD::LOCK_thd_data */
19575-
wsrep_thd_UNLOCK(thd);
19576-
}
19577-
}
19554+
DBUG_ENTER("wsrep_innobase_kill_one_trx");
19555+
THD *thd = (THD *) victim_trx->mysql_thd;
19556+
int64_t bf_seqno = wsrep_thd_trx_seqno(bf_thd);
1957819557

19579-
if (!victim_trx) {
19580-
/* Victim trx might not exist (MDL-conflict) or victim
19581-
was meanwhile rolled back or committed because of
19582-
a KILL statement or a disconnect. */
19583-
goto ret;
19558+
if (!thd) {
19559+
DBUG_PRINT("wsrep", ("no thd for conflicting lock"));
19560+
WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id);
19561+
DBUG_VOID_RETURN;
1958419562
}
1958519563

19564+
WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
19565+
1958619566
WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF
1958719567
"), victim: (%lu) trx: " TRX_ID_FMT,
19588-
arg->signal, arg->bf_seqno,
19568+
signal, bf_seqno,
1958919569
thd_get_thread_id(thd),
1959019570
victim_trx->id);
1959119571

1959219572
WSREP_DEBUG("Aborting query: %s conf %d trx: %" PRId64,
19593-
(wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void",
19573+
(thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void",
1959419574
wsrep_thd_conflict_state(thd, FALSE),
1959519575
wsrep_thd_ws_handle(thd)->trx_id);
1959619576

19577+
wsrep_thd_LOCK(thd);
19578+
DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock",
19579+
{
19580+
const char act[]=
19581+
"now "
19582+
"wait_for signal.wsrep_after_BF_victim_lock";
19583+
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
19584+
STRING_WITH_LEN(act)));
19585+
};);
19586+
19587+
1959719588
if (wsrep_thd_query_state(thd) == QUERY_EXITING) {
1959819589
WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT,
1959919590
victim_trx->id);
19600-
goto ret_unlock;
19591+
wsrep_thd_UNLOCK(thd);
19592+
DBUG_VOID_RETURN;
1960119593
}
1960219594

1960319595
if (wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
@@ -19613,13 +19605,18 @@ static void bg_wsrep_kill_trx(
1961319605
case MUST_ABORT:
1961419606
WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state",
1961519607
victim_trx->id);
19616-
goto ret_awake;
19608+
wsrep_thd_UNLOCK(thd);
19609+
wsrep_thd_awake(thd, signal);
19610+
DBUG_VOID_RETURN;
19611+
break;
1961719612
case ABORTED:
1961819613
case ABORTING: // fall through
1961919614
default:
1962019615
WSREP_DEBUG("victim " TRX_ID_FMT " in state %d",
1962119616
victim_trx->id, wsrep_thd_get_conflict_state(thd));
19622-
goto ret_unlock;
19617+
wsrep_thd_UNLOCK(thd);
19618+
DBUG_VOID_RETURN;
19619+
break;
1962319620
}
1962419621

1962519622
switch (wsrep_thd_query_state(thd)) {
@@ -19632,12 +19629,12 @@ static void bg_wsrep_kill_trx(
1963219629
victim_trx->id);
1963319630

1963419631
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
19635-
wsrep_abort_slave_trx(arg->bf_seqno,
19632+
wsrep_abort_slave_trx(bf_seqno,
1963619633
wsrep_thd_trx_seqno(thd));
1963719634
} else {
1963819635
wsrep_t *wsrep= get_wsrep();
1963919636
rcode = wsrep->abort_pre_commit(
19640-
wsrep, arg->bf_seqno,
19637+
wsrep, bf_seqno,
1964119638
(wsrep_trx_id_t)wsrep_thd_ws_handle(thd)->trx_id
1964219639
);
1964319640

@@ -19646,7 +19643,10 @@ static void bg_wsrep_kill_trx(
1964619643
WSREP_DEBUG("cancel commit warning: "
1964719644
TRX_ID_FMT,
1964819645
victim_trx->id);
19649-
goto ret_awake;
19646+
wsrep_thd_UNLOCK(thd);
19647+
wsrep_thd_awake(thd, signal);
19648+
DBUG_VOID_RETURN;
19649+
break;
1965019650
case WSREP_OK:
1965119651
break;
1965219652
default:
@@ -19659,9 +19659,12 @@ static void bg_wsrep_kill_trx(
1965919659
* kill the lock holder first.
1966019660
*/
1966119661
abort();
19662+
break;
1966219663
}
1966319664
}
19664-
goto ret_awake;
19665+
wsrep_thd_UNLOCK(thd);
19666+
wsrep_thd_awake(thd, signal);
19667+
break;
1966519668
case QUERY_EXEC:
1966619669
/* it is possible that victim trx is itself waiting for some
1966719670
* other lock. We need to cancel this waiting
@@ -19682,30 +19685,37 @@ static void bg_wsrep_kill_trx(
1968219685
lock_cancel_waiting_and_release(wait_lock);
1968319686
}
1968419687

19688+
wsrep_thd_UNLOCK(thd);
19689+
wsrep_thd_awake(thd, signal);
1968519690
} else {
1968619691
/* abort currently executing query */
1968719692
DBUG_PRINT("wsrep",("sending KILL_QUERY to: %lu",
1968819693
thd_get_thread_id(thd)));
1968919694
WSREP_DEBUG("kill query for: %ld",
1969019695
thd_get_thread_id(thd));
19696+
/* Note that innobase_kill_query will take lock_mutex
19697+
and trx_mutex */
19698+
wsrep_thd_UNLOCK(thd);
19699+
wsrep_thd_awake(thd, signal);
1969119700

1969219701
/* for BF thd, we need to prevent him from committing */
1969319702
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
19694-
wsrep_abort_slave_trx(arg->bf_seqno,
19703+
wsrep_abort_slave_trx(bf_seqno,
1969519704
wsrep_thd_trx_seqno(thd));
1969619705
}
1969719706
}
19698-
goto ret_awake;
19707+
break;
1969919708
case QUERY_IDLE:
1970019709
{
1970119710
WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id);
1970219711

1970319712
if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
1970419713
WSREP_DEBUG("kill BF IDLE, seqno: %lld",
1970519714
(long long)wsrep_thd_trx_seqno(thd));
19706-
wsrep_abort_slave_trx(arg->bf_seqno,
19715+
wsrep_thd_UNLOCK(thd);
19716+
wsrep_abort_slave_trx(bf_seqno,
1970719717
wsrep_thd_trx_seqno(thd));
19708-
goto ret_unlock;
19718+
DBUG_VOID_RETURN;
1970919719
}
1971019720
/* This will lock thd from proceeding after net_read() */
1971119721
wsrep_thd_set_conflict_state(thd, ABORTING);
@@ -19726,67 +19736,17 @@ static void bg_wsrep_kill_trx(
1972619736
DBUG_PRINT("wsrep",("signalling wsrep rollbacker"));
1972719737
WSREP_DEBUG("signaling aborter");
1972819738
wsrep_unlock_rollback();
19729-
goto ret_unlock;
19739+
wsrep_thd_UNLOCK(thd);
19740+
19741+
break;
1973019742
}
1973119743
default:
1973219744
WSREP_WARN("bad wsrep query state: %d",
1973319745
wsrep_thd_query_state(thd));
19734-
goto ret_unlock;
19746+
wsrep_thd_UNLOCK(thd);
19747+
break;
1973519748
}
1973619749

19737-
ret_awake:
19738-
awake= true;
19739-
19740-
ret_unlock:
19741-
trx_mutex_exit(victim_trx);
19742-
lock_mutex_exit();
19743-
if (awake)
19744-
wsrep_thd_awake(thd, arg->signal);
19745-
wsrep_thd_UNLOCK(thd);
19746-
19747-
ret:
19748-
free(arg);
19749-
DBUG_VOID_RETURN;
19750-
19751-
}
19752-
19753-
/*******************************************************************//**
19754-
This function is used to kill one transaction in BF. */
19755-
UNIV_INTERN
19756-
void
19757-
wsrep_innobase_kill_one_trx(
19758-
/*========================*/
19759-
MYSQL_THD const bf_thd,
19760-
const trx_t * const bf_trx,
19761-
trx_t *victim_trx,
19762-
ibool signal)
19763-
{
19764-
ut_ad(bf_thd);
19765-
ut_ad(victim_trx);
19766-
ut_ad(lock_mutex_own());
19767-
ut_ad(trx_mutex_own(victim_trx));
19768-
19769-
bg_wsrep_kill_trx_arg *arg = (bg_wsrep_kill_trx_arg*)malloc(sizeof(*arg));
19770-
arg->thd_id = thd_get_thread_id(victim_trx->mysql_thd);
19771-
arg->trx_id = victim_trx->id;
19772-
arg->bf_seqno = wsrep_thd_trx_seqno((THD*)bf_thd);
19773-
arg->signal = signal;
19774-
19775-
DBUG_ENTER("wsrep_innobase_kill_one_trx");
19776-
19777-
WSREP_LOG_CONFLICT(bf_thd, victim_trx->mysql_thd, TRUE);
19778-
19779-
DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock",
19780-
{
19781-
const char act[]=
19782-
"now "
19783-
"wait_for signal.wsrep_after_BF_victim_lock";
19784-
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
19785-
STRING_WITH_LEN(act)));
19786-
};);
19787-
19788-
19789-
mysql_manager_submit(bg_wsrep_kill_trx, arg);
1979019750
DBUG_VOID_RETURN;
1979119751
}
1979219752

@@ -19821,8 +19781,8 @@ wsrep_abort_transaction(
1982119781
WSREP_DEBUG("victim does not have transaction");
1982219782
wsrep_thd_LOCK(victim_thd);
1982319783
wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT);
19824-
wsrep_thd_awake(victim_thd, signal);
1982519784
wsrep_thd_UNLOCK(victim_thd);
19785+
wsrep_thd_awake(victim_thd, signal);
1982619786
}
1982719787

1982819788
DBUG_VOID_RETURN;

0 commit comments

Comments
 (0)