Skip to content

Commit

Permalink
MDEV-22769 Shutdown hang or crash due to XA breaking locks
Browse files Browse the repository at this point in the history
The background drop table queue in InnoDB is a work-around for
cases where the SQL layer is requesting DDL on tables on which
transactional locks exist.

One such case are XA transactions. Our test case exploits the
fact that the recovery of XA PREPARE transactions will
only resurrect InnoDB table locks, but not MDL that should
block any concurrent DDL.

srv_shutdown_t: Introduce the srv_shutdown_state=SRV_SHUTDOWN_INITIATED
for the initial part of shutdown, to wait for the background drop
table queue to be emptied.

srv_shutdown_bg_undo_sources(): Assign
srv_shutdown_state=SRV_SHUTDOWN_INITIATED
before waiting for the background drop table queue to be emptied.

row_drop_tables_for_mysql_in_background(): On slow shutdown, if
no active transactions exist (excluding ones that are in
XA PREPARE state), skip any tables on which locks exist.

row_drop_table_for_mysql(): Do not unnecessarily attempt to
drop InnoDB persistent statistics for tables that have
already been added to the background drop table queue.

row_mysql_close(): Relax an assertion, and free all memory
even if innodb_force_recovery=2 would prevent the background
drop table queue from being emptied.
  • Loading branch information
dr-m committed Jun 5, 2020
1 parent 138c11c commit efc70da
Show file tree
Hide file tree
Showing 13 changed files with 95 additions and 51 deletions.
10 changes: 10 additions & 0 deletions mysql-test/suite/innodb/r/xa_recovery.result
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,19 @@ XA START 'x';
UPDATE t1 set a=2;
XA END 'x';
XA PREPARE 'x';
connect con2,localhost,root;
CREATE TABLE t2 (a INT) ENGINE=InnoDB;
XA START 'y';
INSERT INTO t2 VALUES (1);
XA END 'y';
XA PREPARE 'y';
connection default;
disconnect con1;
disconnect con2;
connect con1,localhost,root;
SELECT * FROM t1 LOCK IN SHARE MODE;
connection default;
DROP TABLE t2;
disconnect con1;
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
SELECT * FROM t1;
Expand All @@ -20,3 +28,5 @@ SELECT * FROM t1;
a
1
DROP TABLE t1;
SET GLOBAL innodb_fast_shutdown=0;
XA ROLLBACK 'y';
13 changes: 12 additions & 1 deletion mysql-test/suite/innodb/t/xa_recovery.test
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@
# MDEV-8841 - close tables opened by previous tests,
# so they don't get marked crashed when the server gets crashed
--disable_query_log
call mtr.add_suppression("Found 1 prepared XA transactions");
call mtr.add_suppression("Found [12] prepared XA transactions");
FLUSH TABLES;
--enable_query_log

CREATE TABLE t1 (a INT) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1);
connect (con1,localhost,root);
XA START 'x'; UPDATE t1 set a=2; XA END 'x'; XA PREPARE 'x';
connect (con2,localhost,root);
CREATE TABLE t2 (a INT) ENGINE=InnoDB;
XA START 'y'; INSERT INTO t2 VALUES (1); XA END 'y'; XA PREPARE 'y';
connection default;

# innodb_force_recovery=2 prevents the purge and tests that the fix of
Expand All @@ -25,6 +28,7 @@ connection default;
--let $shutdown_timeout=

disconnect con1;
disconnect con2;
connect (con1,localhost,root);
--send SELECT * FROM t1 LOCK IN SHARE MODE

Expand All @@ -35,6 +39,8 @@ let $wait_condition=
info = 'SELECT * FROM t1 LOCK IN SHARE MODE';
--source include/wait_condition.inc

DROP TABLE t2;

--source include/restart_mysqld.inc

disconnect con1;
Expand All @@ -45,3 +51,8 @@ XA ROLLBACK 'x';
SELECT * FROM t1;

DROP TABLE t1;

SET GLOBAL innodb_fast_shutdown=0;
--source include/restart_mysqld.inc

XA ROLLBACK 'y';
6 changes: 3 additions & 3 deletions storage/innobase/buf/buf0flu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3150,7 +3150,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*)
ulint last_activity = srv_get_activity_count();
ulint last_pages = 0;

while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
while (srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
ulint curr_time = ut_time_ms();

/* The page_cleaner skips sleep if the server is
Expand All @@ -3168,7 +3168,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*)
ret_sleep = 0;
}

if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
break;
}

Expand Down Expand Up @@ -3335,7 +3335,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*)
ut_d(buf_flush_page_cleaner_disabled_loop());
}

ut_ad(srv_shutdown_state > 0);
ut_ad(srv_shutdown_state > SRV_SHUTDOWN_INITIATED);
if (srv_fast_shutdown == 2
|| srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
/* In very fast shutdown or when innodb failed to start, we
Expand Down
1 change: 1 addition & 0 deletions storage/innobase/fil/fil0crypt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1076,6 +1076,7 @@ struct rotate_thread_t {
case SRV_SHUTDOWN_EXIT_THREADS:
/* srv_init_abort() must have been invoked */
case SRV_SHUTDOWN_CLEANUP:
case SRV_SHUTDOWN_INITIATED:
return true;
case SRV_SHUTDOWN_FLUSH_PHASE:
case SRV_SHUTDOWN_LAST_PHASE:
Expand Down
3 changes: 1 addition & 2 deletions storage/innobase/fts/fts0opt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2790,8 +2790,7 @@ fts_optimize_thread(
/* Assign number of tables added in fts_slots_t to n_tables */
n_tables = ib_vector_size(fts_slots);

while (!done && srv_shutdown_state == SRV_SHUTDOWN_NONE) {

while (!done && srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
/* If there is no message in the queue and we have tables
to optimize then optimize the tables. */

Expand Down
2 changes: 1 addition & 1 deletion storage/innobase/ibuf/ibuf0ibuf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2614,7 +2614,7 @@ ibuf_merge(
when a slow shutdown is being executed. During a slow
shutdown, the insert buffer merge must be completed. */

if (ibuf->empty && !srv_shutdown_state) {
if (ibuf->empty && srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
return(0);
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
} else if (ibuf_debug) {
Expand Down
2 changes: 2 additions & 0 deletions storage/innobase/include/srv0start.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ extern ibool srv_start_raw_disk_in_use;
/** Shutdown state */
enum srv_shutdown_t {
SRV_SHUTDOWN_NONE = 0, /*!< Database running normally */
/** Shutdown initiated in srv_shutdown_bg_undo_sources() */
SRV_SHUTDOWN_INITIATED,
SRV_SHUTDOWN_CLEANUP, /*!< Cleaning up in
logs_empty_and_mark_files_at_shutdown() */
SRV_SHUTDOWN_FLUSH_PHASE,/*!< At this phase the master and the
Expand Down
8 changes: 4 additions & 4 deletions storage/innobase/log/log0log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1203,7 +1203,7 @@ log_write_up_to(
}
}

if (UNIV_UNLIKELY(srv_shutdown_state != SRV_SHUTDOWN_NONE)) {
if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) {
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
"InnoDB log write: "
LSN_PF "," LSN_PF,
Expand Down Expand Up @@ -1430,7 +1430,7 @@ log_group_checkpoint(lsn_t end_lsn)
ut_ad(end_lsn == 0 || end_lsn >= log_sys->next_checkpoint_lsn);
ut_ad(end_lsn <= log_sys->lsn);
ut_ad(end_lsn + SIZE_OF_MLOG_CHECKPOINT <= log_sys->lsn
|| srv_shutdown_state != SRV_SHUTDOWN_NONE);
|| srv_shutdown_state > SRV_SHUTDOWN_INITIATED);

DBUG_PRINT("ib_log", ("checkpoint " UINT64PF " at " LSN_PF
" written",
Expand Down Expand Up @@ -1600,7 +1600,7 @@ bool log_checkpoint(bool sync)
if (oldest_lsn
> log_sys->last_checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT) {
/* Some log has been written since the previous checkpoint. */
} else if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
} else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
/* MariaDB 10.3 startup expects the redo log file to be
logically empty (not even containing a MLOG_CHECKPOINT record)
after a clean shutdown. Perform an extra checkpoint at
Expand All @@ -1625,7 +1625,7 @@ bool log_checkpoint(bool sync)
lsn_t flush_lsn = oldest_lsn;
const lsn_t end_lsn = log_sys->lsn;
const bool do_write
= srv_shutdown_state == SRV_SHUTDOWN_NONE
= srv_shutdown_state <= SRV_SHUTDOWN_INITIATED
|| flush_lsn != end_lsn;

if (fil_names_clear(flush_lsn, do_write)) {
Expand Down
4 changes: 2 additions & 2 deletions storage/innobase/os/os0file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5435,7 +5435,7 @@ os_file_set_size(
? 0 : posix_fallocate(file, current_size,
size - current_size);
} while (err == EINTR
&& srv_shutdown_state == SRV_SHUTDOWN_NONE);
&& srv_shutdown_state <= SRV_SHUTDOWN_INITIATED);

switch (err) {
case 0:
Expand Down Expand Up @@ -5475,7 +5475,7 @@ os_file_set_size(
os_offset_t current_size = os_file_get_size(file);

while (current_size < size
&& srv_shutdown_state == SRV_SHUTDOWN_NONE) {
&& srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
ulint n_bytes;

if (size - current_size < (os_offset_t) buf_size) {
Expand Down
67 changes: 44 additions & 23 deletions storage/innobase/row/row0mysql.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ static UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
static ib_mutex_t row_drop_list_mutex;

/** Flag: has row_mysql_drop_list been initialized? */
static ibool row_mysql_drop_list_inited = FALSE;
static bool row_mysql_drop_list_inited;

/*******************************************************************//**
Determine if the given name is a name reserved for MySQL system tables.
Expand Down Expand Up @@ -2572,15 +2572,33 @@ row_drop_tables_for_mysql_in_background(void)

ut_a(!table->can_be_evicted);

bool skip = false;

if (!table->to_be_dropped) {
skip:
dict_table_close(table, FALSE, FALSE);

mutex_enter(&row_drop_list_mutex);
UT_LIST_REMOVE(row_mysql_drop_list, drop);
UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
if (!skip) {
UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
} else {
ut_free(drop);
}
goto next;
}

if (!srv_fast_shutdown && !trx_sys_any_active_transactions()) {
lock_mutex_enter();
skip = UT_LIST_GET_LEN(table->locks) != 0;
lock_mutex_exit();
if (skip) {
/* We cannot drop tables that are locked by XA
PREPARE transactions. */
goto skip;
}
}

char* name = mem_strdup(table->name.m_name);

dict_table_close(table, FALSE, FALSE);
Expand Down Expand Up @@ -3390,15 +3408,15 @@ row_drop_table_for_mysql(
btr_defragment_remove_table(table);
}

/* Remove stats for this table and all of its indexes from the
persistent storage if it exists and if there are stats for this
table in there. This function creates its own trx and commits
it. */
char errstr[1024];
err = dict_stats_drop_table(name, errstr, sizeof(errstr));

if (err != DB_SUCCESS) {
ib::warn() << errstr;
if (UNIV_LIKELY(!strstr(name, "/" TEMP_FILE_PREFIX_INNODB))) {
/* Remove any persistent statistics for this table,
in a separate transaction. */
char errstr[1024];
err = dict_stats_drop_table(name, errstr,
sizeof errstr);
if (err != DB_SUCCESS) {
ib::warn() << errstr;
}
}
}

Expand Down Expand Up @@ -4808,19 +4826,22 @@ row_mysql_init(void)
row_mysql_drop_list,
&row_mysql_drop_t::row_mysql_drop_list);

row_mysql_drop_list_inited = TRUE;
row_mysql_drop_list_inited = true;
}

/*********************************************************************//**
Close this module */
void
row_mysql_close(void)
/*================*/
void row_mysql_close()
{
ut_a(UT_LIST_GET_LEN(row_mysql_drop_list) == 0);

if (row_mysql_drop_list_inited) {
mutex_free(&row_drop_list_mutex);
row_mysql_drop_list_inited = FALSE;
}
ut_ad(!UT_LIST_GET_LEN(row_mysql_drop_list) ||
srv_force_recovery >= SRV_FORCE_NO_BACKGROUND);
if (row_mysql_drop_list_inited)
{
row_mysql_drop_list_inited= false;
mutex_free(&row_drop_list_mutex);

while (row_mysql_drop_t *drop= UT_LIST_GET_FIRST(row_mysql_drop_list))
{
UT_LIST_REMOVE(row_mysql_drop_list, drop);
ut_free(drop);
}
}
}
4 changes: 2 additions & 2 deletions storage/innobase/row/row0purge.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1004,7 +1004,7 @@ row_purge_parse_undo_rec(

dict_table_close(node->table, FALSE, FALSE);
rw_lock_s_unlock(&dict_operation_lock);
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
return(false);
}
os_thread_sleep(1000000);
Expand Down Expand Up @@ -1167,7 +1167,7 @@ row_purge(
ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_S));

if (purged
|| srv_shutdown_state != SRV_SHUTDOWN_NONE
|| srv_shutdown_state > SRV_SHUTDOWN_INITIATED
|| node->vcol_op_failed()) {
return;
}
Expand Down
Loading

0 comments on commit efc70da

Please sign in to comment.