Skip to content

Commit 3f59bbe

Browse files
temeosjaakolajanlindstrom
authored andcommitted
MDEV-29293 MariaDB stuck on starting commit state
The problem seems to be a deadlock between KILL command execution and BF abort issued by an applier, where: * KILL has locked victim's LOCK_thd_kill and LOCK_thd_data. * Applier has innodb side global lock mutex and victim trx mutex. * KILL is calling innobase_kill_query, and is blocked by innodb global lock mutex. * Applier is in wsrep_innobase_kill_one_trx and is blocked by victim's LOCK_thd_kill. The fix in this commit removes the TOI replication of KILL command and makes KILL execution less intrusive operation. Aborting the victim happens now by using awake_no_mutex() and ha_abort_transaction(). If the KILL happens when the transaction is committing, the KILL operation is postponed to happen after the statement has completed in order to avoid KILL to interrupt commit processing. Notable changes in this commit: * wsrep client connections's error state may remain sticky after client connection is closed. This error message will then pop up for the next client session issuing first SQL statement. This problem raised with test galera.galera_bf_kill. The fix is to reset wsrep client error state, before a THD is reused for next connetion. * Release THD locks in wsrep_abort_transaction when locking innodb mutexes. This guarantees same locking order as with applier BF aborting. * BF abort from MDL was changed to do BF abort on server/wsrep-lib side first, and only then do the BF abort on InnoDB side. This removes the need to call back from InnoDB for BF aborts which originate from MDL and simplifies the locking. * Removed wsrep_thd_set_wsrep_aborter() from service_wsrep.h. The manipulation of the wsrep_aborter can be done solely on server side. Moreover, it is now debug only variable and could be excluded from optimized builds. * Remove LOCK_thd_kill from wsrep_thd_LOCK/UNLOCK to allow more fine grained locking for SR BF abort which may require locking of victim LOCK_thd_kill. Added explicit call for wsrep_thd_kill_LOCK/UNLOCK where appropriate. * Wsrep-lib was updated to version which allows external locking for BF abort calls. Changes to MTR tests: * Disable galera_bf_abort_group_commit. This test is going to be removed (MDEV-30855). * Record galera_gcache_recover_manytrx as result file was incomplete. Trivial change. * Make galera_create_table_as_select more deterministic: Wait until CTAS execution has reached MDL wait for multi-master conflict case. Expected error from multi-master conflict is ER_QUERY_INTERRUPTED. This is because CTAS does not yet have open wsrep transaction when it is waiting for MDL, query gets interrupted instead of BF aborted. This should be addressed in separate task. * A new test galera_kill_group_commit to verify correct behavior when KILL is executed while the transaction is committing. Co-authored-by: Seppo Jaakola <seppo.jaakola@iki.fi> Co-authored-by: Jan Lindström <jan.lindstrom@galeracluster.com> Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
1 parent 03d4fd3 commit 3f59bbe

26 files changed

+574
-245
lines changed

include/mysql/service_wsrep.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ extern struct wsrep_service_st {
5757
my_bool (*wsrep_on_func)(const MYSQL_THD thd);
5858
bool (*wsrep_prepare_key_for_innodb_func)(MYSQL_THD thd, const unsigned char*, size_t, const unsigned char*, size_t, struct wsrep_buf*, size_t*);
5959
void (*wsrep_thd_LOCK_func)(const MYSQL_THD thd);
60+
int (*wsrep_thd_TRYLOCK_func)(const MYSQL_THD thd);
6061
void (*wsrep_thd_UNLOCK_func)(const MYSQL_THD thd);
6162
const char * (*wsrep_thd_query_func)(const MYSQL_THD thd);
6263
int (*wsrep_thd_retry_counter_func)(const MYSQL_THD thd);
@@ -86,7 +87,6 @@ extern struct wsrep_service_st {
8687
ulong (*wsrep_OSU_method_get_func)(const MYSQL_THD thd);
8788
my_bool (*wsrep_thd_has_ignored_error_func)(const MYSQL_THD thd);
8889
void (*wsrep_thd_set_ignored_error_func)(MYSQL_THD thd, my_bool val);
89-
bool (*wsrep_thd_set_wsrep_aborter_func)(MYSQL_THD bf_thd, MYSQL_THD thd);
9090
void (*wsrep_report_bf_lock_wait_func)(const MYSQL_THD thd,
9191
unsigned long long trx_id);
9292
void (*wsrep_thd_kill_LOCK_func)(const MYSQL_THD thd);
@@ -108,6 +108,7 @@ extern struct wsrep_service_st {
108108
#define wsrep_on(thd) (thd) && WSREP_ON && wsrep_service->wsrep_on_func(thd)
109109
#define wsrep_prepare_key_for_innodb(A,B,C,D,E,F,G) wsrep_service->wsrep_prepare_key_for_innodb_func(A,B,C,D,E,F,G)
110110
#define wsrep_thd_LOCK(T) wsrep_service->wsrep_thd_LOCK_func(T)
111+
#define wsrep_thd_TRYLOCK(T) wsrep_service->wsrep_thd_TRYLOCK_func(T)
111112
#define wsrep_thd_UNLOCK(T) wsrep_service->wsrep_thd_UNLOCK_func(T)
112113
#define wsrep_thd_kill_LOCK(T) wsrep_service->wsrep_thd_kill_LOCK_func(T)
113114
#define wsrep_thd_kill_UNLOCK(T) wsrep_service->wsrep_thd_kill_UNLOCK_func(T)
@@ -136,7 +137,6 @@ extern struct wsrep_service_st {
136137
#define wsrep_OSU_method_get(T) wsrep_service->wsrep_OSU_method_get_func(T)
137138
#define wsrep_thd_has_ignored_error(T) wsrep_service->wsrep_thd_has_ignored_error_func(T)
138139
#define wsrep_thd_set_ignored_error(T,V) wsrep_service->wsrep_thd_set_ignored_error_func(T,V)
139-
#define wsrep_thd_set_wsrep_aborter(T) wsrep_service->wsrep_thd_set_wsrep_aborter_func(T1, T2)
140140
#define wsrep_report_bf_lock_wait(T,I) wsrep_service->wsrep_report_bf_lock_wait(T,I)
141141
#define wsrep_thd_set_PA_unsafe(T) wsrep_service->wsrep_thd_set_PA_unsafe_func(T)
142142
#else
@@ -170,6 +170,8 @@ void wsrep_set_data_home_dir(const char *data_dir);
170170
extern "C" my_bool wsrep_on(const MYSQL_THD thd);
171171
/* Lock thd wsrep lock */
172172
extern "C" void wsrep_thd_LOCK(const MYSQL_THD thd);
173+
/* Try thd wsrep lock. Return non-zero if lock could not be taken. */
174+
extern "C" int wsrep_thd_TRYLOCK(const MYSQL_THD thd);
173175
/* Unlock thd wsrep lock */
174176
extern "C" void wsrep_thd_UNLOCK(const MYSQL_THD thd);
175177

@@ -192,8 +194,6 @@ extern "C" my_bool wsrep_thd_is_local(const MYSQL_THD thd);
192194
/* Return true if thd is in high priority mode */
193195
/* todo: rename to is_high_priority() */
194196
extern "C" my_bool wsrep_thd_is_applying(const MYSQL_THD thd);
195-
/* set wsrep_aborter for the target THD */
196-
extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd);
197197
/* Return true if thd is in TOI mode */
198198
extern "C" my_bool wsrep_thd_is_toi(const MYSQL_THD thd);
199199
/* Return true if thd is in replicating TOI mode */
@@ -237,7 +237,6 @@ extern "C" my_bool wsrep_thd_is_applying(const MYSQL_THD thd);
237237
extern "C" ulong wsrep_OSU_method_get(const MYSQL_THD thd);
238238
extern "C" my_bool wsrep_thd_has_ignored_error(const MYSQL_THD thd);
239239
extern "C" void wsrep_thd_set_ignored_error(MYSQL_THD thd, my_bool val);
240-
extern "C" bool wsrep_thd_set_wsrep_aborter(MYSQL_THD bf_thd, MYSQL_THD victim_thd);
241240
extern "C" void wsrep_report_bf_lock_wait(const THD *thd,
242241
unsigned long long trx_id);
243242
/* declare parallel applying unsafety for the THD */

mysql-test/suite/galera/disabled.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ MDEV-26575 : MDEV-29878 Galera test failure on MDEV-26575
2222
galera_bf_abort_shutdown : MDEV-29918 Assertion failure on galera_bf_abort_shutdown
2323
galera_wan : [ERROR] WSREP: /home/buildbot/buildbot/build/gcs/src/gcs_state_msg.cpp:gcs_state_msg_get_quorum():947: Failed to establish quorum.
2424
galera_var_ignore_apply_errors : 28: "Server did not transition to READY state"
25+
MDEV-27713 : test is using get_lock(), which is now rejected in cluster
26+
galera_bf_abort_group_commit : MDEV-30855 PR to remove the test exists
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
connection node_2;
2+
connection node_1;
3+
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
4+
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
5+
set wsrep_sync_wait = 0;
6+
CREATE TABLE t1(a int not null primary key auto_increment, b int) engine=InnoDB;
7+
INSERT INTO t1 VALUES (1,2);
8+
connection node_1a;
9+
BEGIN;
10+
UPDATE t1 SET b=3 WHERE a=1;
11+
connection node_1;
12+
set debug_sync='wsrep_kill_before_awake_no_mutex SIGNAL before_kill WAIT_FOR continue';
13+
connection node_1b;
14+
set debug_sync= 'now WAIT_FOR before_kill';
15+
connection node_2;
16+
UPDATE t1 SET b=7 WHERE a=1;
17+
connection node_1b;
18+
set debug_sync= 'now SIGNAL continue';
19+
connection node_1;
20+
DROP TABLE t1;
21+
SET DEBUG_SYNC= 'RESET';

mysql-test/suite/galera/r/galera_create_table_as_select.result

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
8282
LOCK TABLE t2 WRITE;
8383
connection node_1;
8484
CREATE TABLE t1 AS SELECT * FROM t2;;
85+
connection node_1a;
8586
connection node_2;
8687
SELECT COUNT(*) = 5 FROM t2;
8788
COUNT(*) = 5

mysql-test/suite/galera/r/galera_gcache_recover_manytrx.result

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,3 @@ connection node_1;
134134
call mtr.add_suppression("Error in Log_event::read_log_event():.*");
135135
CALL mtr.add_suppression("conflict state 7 after post commit");
136136
CALL mtr.add_suppression("Skipped GCache ring buffer recovery");
137-
connection node_2;
138-
call mtr.add_suppression("Error in Log_event::read_log_event():.*");
139-
CALL mtr.add_suppression("Skipped GCache ring buffer recovery");
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
connection node_2;
2+
connection node_1;
3+
connect node_1_kill, 127.0.0.1, root, , test, $NODE_MYPORT_1;
4+
connect node_1_ctrl, 127.0.0.1, root, , test, $NODE_MYPORT_1;
5+
SET SESSION wsrep_sync_wait = 0;
6+
connect node_1_follower, 127.0.0.1, root, , test, $NODE_MYPORT_1;
7+
SET SESSION wsrep_sync_wait = 0;
8+
connection node_1;
9+
CREATE TABLE t1 (f1 INT PRIMARY KEY) ENGINE=InnoDB;
10+
SET SESSION DEBUG_SYNC = "commit_before_enqueue SIGNAL leader_before_enqueue_reached WAIT_FOR leader_before_enqueue_continue";
11+
INSERT INTO t1 VALUES (1);
12+
connection node_1_ctrl;
13+
SET DEBUG_SYNC = "now WAIT_FOR leader_before_enqueue_reached";
14+
connection node_1_follower;
15+
INSERT INTO t1 VALUES (2);;
16+
connection node_1_ctrl;
17+
connection node_1_kill;
18+
# Execute KILL QUERY for group commit follower
19+
SET DEBUG_SYNC = "now SIGNAL leader_before_enqueue_continue";
20+
connection node_1_follower;
21+
connection node_1;
22+
SELECT * FROM t1;
23+
f1
24+
1
25+
2
26+
SET DEBUG_SYNC = "RESET";
27+
DROP TABLE t1;
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
--source include/galera_cluster.inc
2+
--source include/have_innodb.inc
3+
--source include/have_debug_sync.inc
4+
--source include/galera_have_debug_sync.inc
5+
6+
--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
7+
--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1
8+
set wsrep_sync_wait = 0;
9+
10+
CREATE TABLE t1(a int not null primary key auto_increment, b int) engine=InnoDB;
11+
INSERT INTO t1 VALUES (1,2);
12+
13+
--connection node_1a
14+
--let $victim_id = `SELECT CONNECTION_ID()`
15+
BEGIN;
16+
UPDATE t1 SET b=3 WHERE a=1;
17+
18+
--connection node_1
19+
set debug_sync='wsrep_kill_before_awake_no_mutex SIGNAL before_kill WAIT_FOR continue';
20+
--disable_query_log
21+
--disable_result_log
22+
--send_eval KILL CONNECTION $victim_id
23+
--enable_result_log
24+
--enable_query_log
25+
26+
--connection node_1b
27+
set debug_sync= 'now WAIT_FOR before_kill';
28+
29+
--connection node_2
30+
UPDATE t1 SET b=7 WHERE a=1;
31+
32+
--connection node_1b
33+
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST WHERE User = 'system user' AND State LIKE 'Update_rows_log_event%';
34+
--source include/wait_condition.inc
35+
set debug_sync= 'now SIGNAL continue';
36+
37+
--connection node_1
38+
--reap
39+
DROP TABLE t1;
40+
SET DEBUG_SYNC= 'RESET';
41+

mysql-test/suite/galera/t/galera_create_table_as_select.test

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ LOCK TABLE t2 WRITE;
113113
--connection node_1
114114
--send CREATE TABLE t1 AS SELECT * FROM t2;
115115

116+
--connection node_1a
117+
--let $wait_condition = SELECT COUNT(*) = 1 FROM information_schema.processlist WHERE STATE LIKE 'Waiting for table metadata lock%'
118+
--source include/wait_condition.inc
119+
116120
--connection node_2
117121
SELECT COUNT(*) = 5 FROM t2;
118122
CREATE TABLE t1 AS SELECT * FROM t2;
@@ -121,7 +125,7 @@ CREATE TABLE t1 AS SELECT * FROM t2;
121125
UNLOCK TABLES;
122126

123127
--connection node_1
124-
--error ER_TABLE_EXISTS_ERROR,ER_LOCK_DEADLOCK
128+
--error ER_TABLE_EXISTS_ERROR,ER_QUERY_INTERRUPTED
125129
--reap
126130

127131
DROP TABLE t1, t2;
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
!include ../galera_2nodes.cnf
2+
3+
[mysqld]
4+
log-bin
5+
log-slave-updates
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#
2+
# Verify that transaction which has reached group commit queue
3+
# cannot be killed. If the kill succeeds, assertion for
4+
# wsrep transaction state will fail.
5+
#
6+
# If the bug is present, i.e. wsrep transaction gets killed during
7+
# group commit wait, this test is enough to reproduce the crash
8+
# most of the time.
9+
#
10+
11+
--source include/have_innodb.inc
12+
--source include/have_debug_sync.inc
13+
--source include/galera_cluster.inc
14+
15+
# Connection for KILL commands
16+
--connect node_1_kill, 127.0.0.1, root, , test, $NODE_MYPORT_1
17+
# Connection for sync point control
18+
--connect node_1_ctrl, 127.0.0.1, root, , test, $NODE_MYPORT_1
19+
SET SESSION wsrep_sync_wait = 0;
20+
# Connection for group commit follower
21+
--connect node_1_follower, 127.0.0.1, root, , test, $NODE_MYPORT_1
22+
# Need to disable sync wait to reach commit queue when leader
23+
# is blocked.
24+
SET SESSION wsrep_sync_wait = 0;
25+
--let $follower_id = `SELECT CONNECTION_ID()`
26+
27+
--connection node_1
28+
CREATE TABLE t1 (f1 INT PRIMARY KEY) ENGINE=InnoDB;
29+
30+
SET SESSION DEBUG_SYNC = "commit_before_enqueue SIGNAL leader_before_enqueue_reached WAIT_FOR leader_before_enqueue_continue";
31+
--send INSERT INTO t1 VALUES (1)
32+
33+
--connection node_1_ctrl
34+
SET DEBUG_SYNC = "now WAIT_FOR leader_before_enqueue_reached";
35+
36+
--connection node_1_follower
37+
# SET SESSION DEBUG_SYNC = "group_commit_waiting_for_prior SIGNAL follower_waiting_for_prior_reached WAIT_FOR follower_waiting_for_prior_continue";
38+
--send INSERT INTO t1 VALUES (2);
39+
40+
--connection node_1_ctrl
41+
# TODO: Is it possible to use sync points to enforce group commit to happen?
42+
# The leader will hold commit monitor in commit_before_enqueue sync point,
43+
# which prevents the follower to reach the group commit wait state.
44+
# We now sleep and expect the follower to reach group commit, but this
45+
# may cause false negatives.
46+
--sleep 1
47+
48+
--connection node_1_kill
49+
--echo # Execute KILL QUERY for group commit follower
50+
--disable_query_log
51+
--disable_result_log
52+
# Because it is currently impossible to verify that the
53+
# follower has reached group commit queue, the KILL may
54+
# sometimes return success.
55+
--error 0,ER_KILL_DENIED_ERROR
56+
--eval KILL QUERY $follower_id
57+
--enable_result_log
58+
--enable_query_log
59+
60+
SET DEBUG_SYNC = "now SIGNAL leader_before_enqueue_continue";
61+
--connection node_1_follower
62+
--reap
63+
64+
--connection node_1
65+
--reap
66+
SELECT * FROM t1;
67+
68+
SET DEBUG_SYNC = "RESET";
69+
DROP TABLE t1;

sql/handler.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7599,6 +7599,9 @@ Compare_keys handler::compare_key_parts(const Field &old_field,
75997599
concurrent accesses. And it's an overkill to take LOCK_plugin and
76007600
iterate the whole installed_htons[] array every time.
76017601
7602+
@note Object victim_thd is not guaranteed to exist after this
7603+
function returns.
7604+
76027605
@param bf_thd brute force THD asking for the abort
76037606
@param victim_thd victim THD to be aborted
76047607
@@ -7612,6 +7615,8 @@ int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal)
76127615
if (!WSREP(bf_thd) &&
76137616
!(bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU &&
76147617
wsrep_thd_is_toi(bf_thd))) {
7618+
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
7619+
mysql_mutex_unlock(&victim_thd->LOCK_thd_kill);
76157620
DBUG_RETURN(0);
76167621
}
76177622

@@ -7623,6 +7628,8 @@ int ha_abort_transaction(THD *bf_thd, THD *victim_thd, my_bool signal)
76237628
else
76247629
{
76257630
WSREP_WARN("Cannot abort InnoDB transaction");
7631+
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
7632+
mysql_mutex_unlock(&victim_thd->LOCK_thd_kill);
76267633
}
76277634

76287635
DBUG_RETURN(0);

sql/service_wsrep.cc

Lines changed: 7 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,17 @@ extern "C" my_bool wsrep_on(const THD *thd)
2929

3030
extern "C" void wsrep_thd_LOCK(const THD *thd)
3131
{
32-
mysql_mutex_lock(&thd->LOCK_thd_kill);
3332
mysql_mutex_lock(&thd->LOCK_thd_data);
3433
}
3534

35+
extern "C" int wsrep_thd_TRYLOCK(const THD *thd)
36+
{
37+
return mysql_mutex_trylock(&thd->LOCK_thd_data);
38+
}
39+
3640
extern "C" void wsrep_thd_UNLOCK(const THD *thd)
3741
{
3842
mysql_mutex_unlock(&thd->LOCK_thd_data);
39-
mysql_mutex_unlock(&thd->LOCK_thd_kill);
4043
}
4144

4245
extern "C" void wsrep_thd_kill_LOCK(const THD *thd)
@@ -248,21 +251,12 @@ extern "C" my_bool wsrep_thd_bf_abort(THD *bf_thd, THD *victim_thd,
248251

249252
if ((ret || !wsrep_on(victim_thd)) && signal)
250253
{
251-
if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id)
252-
{
253-
WSREP_DEBUG("victim is killed already by %llu, skipping awake",
254-
victim_thd->wsrep_aborter);
255-
wsrep_thd_UNLOCK(victim_thd);
256-
return false;
257-
}
258-
259-
victim_thd->wsrep_aborter= bf_thd->thread_id;
260254
victim_thd->awake_no_mutex(KILL_QUERY_HARD);
261255
}
262256
else
263-
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake for %llu", thd_get_thread_id(victim_thd));
257+
WSREP_DEBUG("wsrep_thd_bf_abort skipped awake for %llu",
258+
thd_get_thread_id(victim_thd));
264259

265-
wsrep_thd_UNLOCK(victim_thd);
266260
return ret;
267261
}
268262

@@ -385,25 +379,6 @@ extern "C" ulong wsrep_OSU_method_get(const MYSQL_THD thd)
385379
return(global_system_variables.wsrep_OSU_method);
386380
}
387381

388-
extern "C" bool wsrep_thd_set_wsrep_aborter(THD *bf_thd, THD *victim_thd)
389-
{
390-
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
391-
if (!bf_thd)
392-
{
393-
victim_thd->wsrep_aborter= 0;
394-
WSREP_DEBUG("wsrep_thd_set_wsrep_aborter resetting wsrep_aborter");
395-
return false;
396-
}
397-
if (victim_thd->wsrep_aborter && victim_thd->wsrep_aborter != bf_thd->thread_id)
398-
{
399-
return true;
400-
}
401-
victim_thd->wsrep_aborter= bf_thd->thread_id;
402-
WSREP_DEBUG("wsrep_thd_set_wsrep_aborter setting wsrep_aborter %u",
403-
victim_thd->wsrep_aborter);
404-
return false;
405-
}
406-
407382
extern "C" void wsrep_report_bf_lock_wait(const THD *thd,
408383
unsigned long long trx_id)
409384
{

0 commit comments

Comments
 (0)