Skip to content

Commit 1af6e92

Browse files
committed
MDEV-22666 galera.MW-328A hang
The hang can happen between a lock connection issuing KILL CONNECTION for a victim, which is in committing phase. There happens two resource deadlockwhere killer is holding victim's LOCK_thd_data and requires trx mutex for the victim. The victim, otoh, holds his own trx mutex, but requires LOCK_thd_data in wsrep_commit_ordered(). Hence a classic two thread deadlock happens. The fix in this commit changes innodb commit so that wsrep_commit_ordered() is not called while holding trx mutex. With this, wsrep patch commit time mutex locking does not violate the locking protocol of KILL command (i.e. LOCK_thd_data -> trx mutex) Also, a new test case has been added in galera.galera_bf_kill.test for scenario where a client connection is killed in committting phase.
1 parent dc22acf commit 1af6e92

File tree

4 files changed

+67
-6
lines changed

4 files changed

+67
-6
lines changed

mysql-test/suite/galera/r/galera_bf_kill.result

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,20 @@ a b
7070
2 1
7171
disconnect node_2a;
7272
drop table t1;
73+
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
74+
connection node_2a;
75+
CREATE TABLE t1 (i int primary key);
76+
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
77+
INSERT INTO t1 VALUES (1);
78+
connection node_2;
79+
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
80+
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
81+
SET DEBUG_SYNC='RESET';
82+
connection node_2a;
83+
connection node_2;
84+
select * from t1;
85+
i
86+
1
87+
disconnect node_2a;
88+
connection node_2;
89+
drop table t1;

mysql-test/suite/galera/t/galera_bf_kill.test

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,4 +140,48 @@ select * from t1;
140140
drop table t1;
141141

142142

143+
#
144+
# Test case 7:
145+
# run a transaction in node 2, and set a sync point to pause the transaction
146+
# in commit phase.
147+
# Through another connection to node 2, kill the committing transaction by
148+
# KILL QUERY command
149+
#
150+
151+
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
152+
--connection node_2a
153+
--let $connection_id = `SELECT CONNECTION_ID()`
154+
155+
CREATE TABLE t1 (i int primary key);
156+
157+
# Set up sync point
158+
SET DEBUG_SYNC = "before_wsrep_ordered_commit SIGNAL bwoc_reached WAIT_FOR bwoc_continue";
159+
160+
# Send insert which will block in the sync point above
161+
--send INSERT INTO t1 VALUES (1)
162+
163+
--connection node_2
164+
SET DEBUG_SYNC = "now WAIT_FOR bwoc_reached";
165+
166+
--disable_query_log
167+
--disable_result_log
168+
# victim has passed the point of no return, kill is not possible anymore
169+
--eval KILL QUERY $connection_id
170+
--enable_result_log
171+
--enable_query_log
172+
173+
SET DEBUG_SYNC = "now SIGNAL bwoc_continue";
174+
SET DEBUG_SYNC='RESET';
175+
--connection node_2a
176+
--error 0,1213
177+
--reap
178+
179+
--connection node_2
180+
# victim was able to complete the INSERT
181+
select * from t1;
182+
183+
--disconnect node_2a
184+
185+
--connection node_2
186+
drop table t1;
143187

sql/service_wsrep.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@ extern "C" void wsrep_commit_ordered(THD *thd)
299299
thd->wsrep_trx().state() == wsrep::transaction::s_committing &&
300300
!wsrep_commit_will_write_binlog(thd))
301301
{
302+
DEBUG_SYNC(thd, "before_wsrep_ordered_commit");
302303
thd->wsrep_cs().ordered_commit();
303304
}
304305
}

storage/innobase/trx/trx0trx.cc

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,12 +1493,6 @@ inline void trx_t::commit_in_memory(const mtr_t *mtr)
14931493
if (fts_trx)
14941494
trx_finalize_for_fts(this, undo_no != 0);
14951495

1496-
trx_mutex_enter(this);
1497-
dict_operation= TRX_DICT_OP_NONE;
1498-
1499-
DBUG_LOG("trx", "Commit in memory: " << this);
1500-
state= TRX_STATE_NOT_STARTED;
1501-
15021496
#ifdef WITH_WSREP
15031497
/* Serialization history has been written and the transaction is
15041498
committed in memory, which makes this commit ordered. Release commit
@@ -1510,6 +1504,11 @@ inline void trx_t::commit_in_memory(const mtr_t *mtr)
15101504
}
15111505
lock.was_chosen_as_wsrep_victim= false;
15121506
#endif /* WITH_WSREP */
1507+
trx_mutex_enter(this);
1508+
dict_operation= TRX_DICT_OP_NONE;
1509+
1510+
DBUG_LOG("trx", "Commit in memory: " << this);
1511+
state= TRX_STATE_NOT_STARTED;
15131512

15141513
assert_freed();
15151514
trx_init(this);

0 commit comments

Comments
 (0)