Skip to content

Commit

Permalink
MDEV-23536 : Race condition between KILL and transaction commit
Browse files Browse the repository at this point in the history
A race condition may occur between the execution of transaction commit,
and an execution of a KILL statement that would attempt to abort that
transaction.

MDEV-17092 worked around this race condition by modifying InnoDB code.
After that issue was closed, Sergey Vojtovich pointed out that this
race condition would better be fixed above the storage engine layer:

If you look carefully into the above, you can conclude that
thd->free_connection() can be called concurrently with
KILL/thd->awake(). Which is the bug. And it is partially fixed in
THD::~THD(), that is destructor waits for KILL completion:

Fix: Add necessary mutex operations to THD::free_connection()
and move WSREP specific code also there. This ensures that no
one is using THD while we do free_connection(). These mutexes
will also ensures that there can't be concurrent KILL/THD::awake().

innobase_kill_query
  We can now remove usage of trx_sys_mutex introduced on MDEV-17092.

trx_t::free()
  Poison trx->state and trx->mysql_thd

This patch is validated with an RQG run similar to the one that
reproduced MDEV-17092.
  • Loading branch information
Jan Lindström committed Jan 8, 2021
1 parent 18254c1 commit 775fcce
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 39 deletions.
38 changes: 31 additions & 7 deletions sql/sql_class.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1357,12 +1357,15 @@ void THD::change_user(void)

/* Do operations that may take a long time */

void THD::cleanup(void)
void THD::cleanup(bool have_mutex)
{
DBUG_ENTER("THD::cleanup");
DBUG_ASSERT(cleanup_done == 0);

set_killed(KILL_CONNECTION);
if (have_mutex)
set_killed_no_mutex(KILL_CONNECTION,0,0);
else
set_killed(KILL_CONNECTION);
#ifdef ENABLE_WHEN_BINLOG_WILL_BE_ABLE_TO_PREPARE
if (transaction.xid_state.xa_state == XA_PREPARED)
{
Expand Down Expand Up @@ -1437,6 +1440,28 @@ void THD::cleanup(void)
void THD::free_connection()
{
DBUG_ASSERT(free_connection_done == 0);
/* Check that we have already called thd->unlink() */
DBUG_ASSERT(prev == 0 && next == 0);

/*
Other threads may have a lock on THD::LOCK_thd_data or
THD::LOCK_thd_kill to ensure that this THD is not deleted
while they access it. The following mutex_lock ensures
that no one else is using this THD and it's now safe to
continue.
For example consider KILL-statement execution on
sql_parse.cc kill_one_thread() that will use
THD::LOCK_thd_data to protect victim thread during
THD::awake().
*/
mysql_mutex_lock(&LOCK_thd_data);
mysql_mutex_lock(&LOCK_thd_kill);

#ifdef WITH_WSREP
delete wsrep_rgi;
wsrep_rgi= 0;
#endif /* WITH_WSREP */
my_free(db);
db= NULL;
#ifndef EMBEDDED_LIBRARY
Expand All @@ -1445,8 +1470,8 @@ void THD::free_connection()
net.vio= 0;
net_end(&net);
#endif
if (!cleanup_done)
cleanup();
if (!cleanup_done)
cleanup(true); // We have locked THD::LOCK_thd_kill
ha_close_connection(this);
plugin_thdvar_cleanup(this);
mysql_audit_free_thd(this);
Expand All @@ -1457,6 +1482,8 @@ void THD::free_connection()
#if defined(ENABLED_PROFILING)
profiling.restart(); // Reset profiling
#endif
mysql_mutex_unlock(&LOCK_thd_kill);
mysql_mutex_unlock(&LOCK_thd_data);
}

/*
Expand Down Expand Up @@ -1512,9 +1539,6 @@ THD::~THD()
mysql_mutex_lock(&LOCK_thd_data);
mysql_mutex_unlock(&LOCK_thd_data);

#ifdef WITH_WSREP
delete wsrep_rgi;
#endif
if (!free_connection_done)
free_connection();

Expand Down
2 changes: 1 addition & 1 deletion sql/sql_class.h
Original file line number Diff line number Diff line change
Expand Up @@ -3194,7 +3194,7 @@ class THD :public Statement,
void update_all_stats();
void update_stats(void);
void change_user(void);
void cleanup(void);
void cleanup(bool have_mutex=false);
void cleanup_after_query();
void free_connection();
void reset_for_reuse();
Expand Down
35 changes: 6 additions & 29 deletions storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5176,38 +5176,15 @@ static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels)

if (trx_t* trx= thd_to_trx(thd))
{
ut_ad(trx->mysql_thd == thd);
lock_mutex_enter();
trx_sys_mutex_enter();
trx_mutex_enter(trx);
/* It is possible that innobase_close_connection() is concurrently
being executed on our victim. In that case, trx->mysql_thd would
be reset before invoking trx_t::free(). Even if the trx object is later
reused for another client connection or a background transaction,
its trx->mysql_thd will differ from our thd.

If trx never performed any changes, nothing is really protecting
the trx_t::free() call or the changes of trx_t::state when the
transaction is being rolled back and trx_commit_low() is being
executed.

The function trx_allocate_for_mysql() acquires
trx_sys_t::mutex, but trx_allocate_for_background() will not.
Luckily, background transactions cannot be read-only, because
for read-only transactions, trx_start_low() will avoid acquiring
any of the trx_sys_t::mutex, lock_sys_t::mutex, trx_t::mutex before
assigning trx_t::state.

At this point, trx may have been reallocated for another client
connection, or for a background operation. In that case, either
trx_t::state or trx_t::mysql_thd should not match our expectations. */
bool cancel= trx->mysql_thd == thd && trx->state == TRX_STATE_ACTIVE &&
!trx->lock.was_chosen_as_deadlock_victim;
trx_sys_mutex_exit();
if (!cancel);
else if (lock_t *lock= trx->lock.wait_lock)
if (lock_t *lock= trx->lock.wait_lock)
{
trx_mutex_enter(trx);
lock_cancel_waiting_and_release(lock);
trx_mutex_exit(trx);
}
lock_mutex_exit();
trx_mutex_exit(trx);
}

DBUG_VOID_RETURN;
Expand Down
1 change: 1 addition & 0 deletions storage/innobase/lock/lock0lock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6453,6 +6453,7 @@ lock_cancel_waiting_and_release(

ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(lock->trx));
ut_ad(lock->trx->state == TRX_STATE_ACTIVE);

lock->trx->lock.cancel = true;

Expand Down
4 changes: 2 additions & 2 deletions storage/innobase/trx/trx0trx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ inline void trx_t::free()
/* do not poison mutex */
MEM_NOACCESS(&id, sizeof id);
MEM_NOACCESS(&no, sizeof no);
/* state is accessed by innobase_kill_connection() */
MEM_NOACCESS(&state, sizeof state);
MEM_NOACCESS(&is_recovered, sizeof is_recovered);
#ifdef WITH_WSREP
MEM_NOACCESS(&wsrep, sizeof wsrep);
Expand All @@ -419,7 +419,7 @@ inline void trx_t::free()
MEM_NOACCESS(&start_time_micro, sizeof start_time_micro);
MEM_NOACCESS(&commit_lsn, sizeof commit_lsn);
MEM_NOACCESS(&table_id, sizeof table_id);
/* mysql_thd is accessed by innobase_kill_connection() */
MEM_NOACCESS(&mysql_thd, sizeof mysql_thd);
MEM_NOACCESS(&mysql_log_file_name, sizeof mysql_log_file_name);
MEM_NOACCESS(&mysql_log_offset, sizeof mysql_log_offset);
MEM_NOACCESS(&n_mysql_tables_in_use, sizeof n_mysql_tables_in_use);
Expand Down

0 comments on commit 775fcce

Please sign in to comment.