Skip to content

Commit 775fcce

Browse files
author
Jan Lindström
committed
MDEV-23536 : Race condition between KILL and transaction commit
A race condition may occur between the execution of transaction commit, and an execution of a KILL statement that would attempt to abort that transaction. MDEV-17092 worked around this race condition by modifying InnoDB code. After that issue was closed, Sergey Vojtovich pointed out that this race condition would better be fixed above the storage engine layer: If you look carefully into the above, you can conclude that thd->free_connection() can be called concurrently with KILL/thd->awake(). Which is the bug. And it is partially fixed in THD::~THD(), that is destructor waits for KILL completion: Fix: Add necessary mutex operations to THD::free_connection() and move WSREP specific code also there. This ensures that no one is using THD while we do free_connection(). These mutexes will also ensures that there can't be concurrent KILL/THD::awake(). innobase_kill_query We can now remove usage of trx_sys_mutex introduced on MDEV-17092. trx_t::free() Poison trx->state and trx->mysql_thd This patch is validated with an RQG run similar to the one that reproduced MDEV-17092.
1 parent 18254c1 commit 775fcce

File tree

5 files changed

+41
-39
lines changed

5 files changed

+41
-39
lines changed

sql/sql_class.cc

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1357,12 +1357,15 @@ void THD::change_user(void)
13571357

13581358
/* Do operations that may take a long time */
13591359

1360-
void THD::cleanup(void)
1360+
void THD::cleanup(bool have_mutex)
13611361
{
13621362
DBUG_ENTER("THD::cleanup");
13631363
DBUG_ASSERT(cleanup_done == 0);
13641364

1365-
set_killed(KILL_CONNECTION);
1365+
if (have_mutex)
1366+
set_killed_no_mutex(KILL_CONNECTION,0,0);
1367+
else
1368+
set_killed(KILL_CONNECTION);
13661369
#ifdef ENABLE_WHEN_BINLOG_WILL_BE_ABLE_TO_PREPARE
13671370
if (transaction.xid_state.xa_state == XA_PREPARED)
13681371
{
@@ -1437,6 +1440,28 @@ void THD::cleanup(void)
14371440
void THD::free_connection()
14381441
{
14391442
DBUG_ASSERT(free_connection_done == 0);
1443+
/* Check that we have already called thd->unlink() */
1444+
DBUG_ASSERT(prev == 0 && next == 0);
1445+
1446+
/*
1447+
Other threads may have a lock on THD::LOCK_thd_data or
1448+
THD::LOCK_thd_kill to ensure that this THD is not deleted
1449+
while they access it. The following mutex_lock ensures
1450+
that no one else is using this THD and it's now safe to
1451+
continue.
1452+
1453+
For example consider KILL-statement execution on
1454+
sql_parse.cc kill_one_thread() that will use
1455+
THD::LOCK_thd_data to protect victim thread during
1456+
THD::awake().
1457+
*/
1458+
mysql_mutex_lock(&LOCK_thd_data);
1459+
mysql_mutex_lock(&LOCK_thd_kill);
1460+
1461+
#ifdef WITH_WSREP
1462+
delete wsrep_rgi;
1463+
wsrep_rgi= 0;
1464+
#endif /* WITH_WSREP */
14401465
my_free(db);
14411466
db= NULL;
14421467
#ifndef EMBEDDED_LIBRARY
@@ -1445,8 +1470,8 @@ void THD::free_connection()
14451470
net.vio= 0;
14461471
net_end(&net);
14471472
#endif
1448-
if (!cleanup_done)
1449-
cleanup();
1473+
if (!cleanup_done)
1474+
cleanup(true); // We have locked THD::LOCK_thd_kill
14501475
ha_close_connection(this);
14511476
plugin_thdvar_cleanup(this);
14521477
mysql_audit_free_thd(this);
@@ -1457,6 +1482,8 @@ void THD::free_connection()
14571482
#if defined(ENABLED_PROFILING)
14581483
profiling.restart(); // Reset profiling
14591484
#endif
1485+
mysql_mutex_unlock(&LOCK_thd_kill);
1486+
mysql_mutex_unlock(&LOCK_thd_data);
14601487
}
14611488

14621489
/*
@@ -1512,9 +1539,6 @@ THD::~THD()
15121539
mysql_mutex_lock(&LOCK_thd_data);
15131540
mysql_mutex_unlock(&LOCK_thd_data);
15141541

1515-
#ifdef WITH_WSREP
1516-
delete wsrep_rgi;
1517-
#endif
15181542
if (!free_connection_done)
15191543
free_connection();
15201544

sql/sql_class.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3194,7 +3194,7 @@ class THD :public Statement,
31943194
void update_all_stats();
31953195
void update_stats(void);
31963196
void change_user(void);
3197-
void cleanup(void);
3197+
void cleanup(bool have_mutex=false);
31983198
void cleanup_after_query();
31993199
void free_connection();
32003200
void reset_for_reuse();

storage/innobase/handler/ha_innodb.cc

Lines changed: 6 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5176,38 +5176,15 @@ static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels)
51765176

51775177
if (trx_t* trx= thd_to_trx(thd))
51785178
{
5179+
ut_ad(trx->mysql_thd == thd);
51795180
lock_mutex_enter();
5180-
trx_sys_mutex_enter();
5181-
trx_mutex_enter(trx);
5182-
/* It is possible that innobase_close_connection() is concurrently
5183-
being executed on our victim. In that case, trx->mysql_thd would
5184-
be reset before invoking trx_t::free(). Even if the trx object is later
5185-
reused for another client connection or a background transaction,
5186-
its trx->mysql_thd will differ from our thd.
5187-
5188-
If trx never performed any changes, nothing is really protecting
5189-
the trx_t::free() call or the changes of trx_t::state when the
5190-
transaction is being rolled back and trx_commit_low() is being
5191-
executed.
5192-
5193-
The function trx_allocate_for_mysql() acquires
5194-
trx_sys_t::mutex, but trx_allocate_for_background() will not.
5195-
Luckily, background transactions cannot be read-only, because
5196-
for read-only transactions, trx_start_low() will avoid acquiring
5197-
any of the trx_sys_t::mutex, lock_sys_t::mutex, trx_t::mutex before
5198-
assigning trx_t::state.
5199-
5200-
At this point, trx may have been reallocated for another client
5201-
connection, or for a background operation. In that case, either
5202-
trx_t::state or trx_t::mysql_thd should not match our expectations. */
5203-
bool cancel= trx->mysql_thd == thd && trx->state == TRX_STATE_ACTIVE &&
5204-
!trx->lock.was_chosen_as_deadlock_victim;
5205-
trx_sys_mutex_exit();
5206-
if (!cancel);
5207-
else if (lock_t *lock= trx->lock.wait_lock)
5181+
if (lock_t *lock= trx->lock.wait_lock)
5182+
{
5183+
trx_mutex_enter(trx);
52085184
lock_cancel_waiting_and_release(lock);
5185+
trx_mutex_exit(trx);
5186+
}
52095187
lock_mutex_exit();
5210-
trx_mutex_exit(trx);
52115188
}
52125189

52135190
DBUG_VOID_RETURN;

storage/innobase/lock/lock0lock.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6453,6 +6453,7 @@ lock_cancel_waiting_and_release(
64536453

64546454
ut_ad(lock_mutex_own());
64556455
ut_ad(trx_mutex_own(lock->trx));
6456+
ut_ad(lock->trx->state == TRX_STATE_ACTIVE);
64566457

64576458
lock->trx->lock.cancel = true;
64586459

storage/innobase/trx/trx0trx.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ inline void trx_t::free()
393393
/* do not poison mutex */
394394
MEM_NOACCESS(&id, sizeof id);
395395
MEM_NOACCESS(&no, sizeof no);
396-
/* state is accessed by innobase_kill_connection() */
396+
MEM_NOACCESS(&state, sizeof state);
397397
MEM_NOACCESS(&is_recovered, sizeof is_recovered);
398398
#ifdef WITH_WSREP
399399
MEM_NOACCESS(&wsrep, sizeof wsrep);
@@ -419,7 +419,7 @@ inline void trx_t::free()
419419
MEM_NOACCESS(&start_time_micro, sizeof start_time_micro);
420420
MEM_NOACCESS(&commit_lsn, sizeof commit_lsn);
421421
MEM_NOACCESS(&table_id, sizeof table_id);
422-
/* mysql_thd is accessed by innobase_kill_connection() */
422+
MEM_NOACCESS(&mysql_thd, sizeof mysql_thd);
423423
MEM_NOACCESS(&mysql_log_file_name, sizeof mysql_log_file_name);
424424
MEM_NOACCESS(&mysql_log_offset, sizeof mysql_log_offset);
425425
MEM_NOACCESS(&n_mysql_tables_in_use, sizeof n_mysql_tables_in_use);

0 commit comments

Comments
 (0)