Skip to content

Commit

Permalink
MDEV-22666 galera.MW-328A hang
Browse files Browse the repository at this point in the history
The hang can happen between a lock connection issuing KILL CONNECTION for a victim,
which is in committing phase.
There happens two resource deadlockwhere  killer is holding victim's
LOCK_thd_data and requires trx mutex for the victim.
The victim, otoh, holds his own trx mutex, but requires LOCK_thd_data
in wsrep_commit_ordered(). Hence a classic two thread deadlock happens.

The fix in this commit changes innodb commit so that wsrep_commit_ordered()
is not called while holding trx mutex. With this, wsrep patch commit time mutex
locking does not violate the locking protocol of KILL command
(i.e. LOCK_thd_data -> trx mutex)

Also, a new test case has been added in galera.galera_bf_kill.test for scenario
where a client connection is killed in committting phase.
  • Loading branch information
sjaakola committed May 25, 2020
1 parent dc22acf commit 1af6e92
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 6 deletions.
17 changes: 17 additions & 0 deletions mysql-test/suite/galera/r/galera_bf_kill.result
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,20 @@ a b
2 1
disconnect node_2a;
drop table t1;
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
connection node_2a;
CREATE TABLE t1 (i int primary key);
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
INSERT INTO t1 VALUES (1);
connection node_2;
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
SET DEBUG_SYNC='RESET';
connection node_2a;
connection node_2;
select * from t1;
i
1
disconnect node_2a;
connection node_2;
drop table t1;
44 changes: 44 additions & 0 deletions mysql-test/suite/galera/t/galera_bf_kill.test
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,48 @@ select * from t1;
drop table t1;


#
# Test case 7:
# run a transaction in node 2, and set a sync point to pause the transaction
# in commit phase.
# Through another connection to node 2, kill the committing transaction by
# KILL QUERY command
#

--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--connection node_2a
--let $connection_id = `SELECT CONNECTION_ID()`

CREATE TABLE t1 (i int primary key);

# Set up sync point
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";

# Send insert which will block in the sync point above
--send INSERT INTO t1 VALUES (1)

--connection node_2
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";

--disable_query_log
--disable_result_log
# victim has passed the point of no return, kill is not possible anymore
--eval KILL QUERY $connection_id
--enable_result_log
--enable_query_log

SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
SET DEBUG_SYNC='RESET';
--connection node_2a
--error 0,1213
--reap

--connection node_2
# victim was able to complete the INSERT
select * from t1;

--disconnect node_2a

--connection node_2
drop table t1;

1 change: 1 addition & 0 deletions sql/service_wsrep.cc
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ extern "C" void wsrep_commit_ordered(THD *thd)
thd->wsrep_trx().state() == wsrep::transaction::s_committing &&
!wsrep_commit_will_write_binlog(thd))
{
DEBUG_SYNC(thd, "before_wsrep_ordered_commit");
thd->wsrep_cs().ordered_commit();
}
}
11 changes: 5 additions & 6 deletions storage/innobase/trx/trx0trx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1493,12 +1493,6 @@ inline void trx_t::commit_in_memory(const mtr_t *mtr)
if (fts_trx)
trx_finalize_for_fts(this, undo_no != 0);

trx_mutex_enter(this);
dict_operation= TRX_DICT_OP_NONE;

DBUG_LOG("trx", "Commit in memory: " << this);
state= TRX_STATE_NOT_STARTED;

#ifdef WITH_WSREP
/* Serialization history has been written and the transaction is
committed in memory, which makes this commit ordered. Release commit
Expand All @@ -1510,6 +1504,11 @@ inline void trx_t::commit_in_memory(const mtr_t *mtr)
}
lock.was_chosen_as_wsrep_victim= false;
#endif /* WITH_WSREP */
trx_mutex_enter(this);
dict_operation= TRX_DICT_OP_NONE;

DBUG_LOG("trx", "Commit in memory: " << this);
state= TRX_STATE_NOT_STARTED;

assert_freed();
trx_init(this);
Expand Down

0 comments on commit 1af6e92

Please sign in to comment.