Skip to content

Commit

Permalink
MDEV-16962 Assertion failed in open_purge_table upon concurrent ALTER…
Browse files Browse the repository at this point in the history
…/FLUSH

So we are having a race condition of three of threads, resulting in a
deadlock backoff in purge, which is unexpected.

More precisely, the following happens:
T1: NOCOPY ALTER TABLE begins, and eventually it holds MDL_SHARED_NO_WRITE
 lock;
T2: FLUSH TABLES begins. it sets share->tdc->flushed = true
T3: purge on a record with virtual column begins. it is going to open a
 table. MDL_SHARED_READ lock is acquired therefore.
Since share->tdc->flushed is set, it waits for a TDC purge end.
T1: is going to elevate MDL LOCK to exclusive and therefore has to set
 other waiters to back off.
T3: receives VICTIM status, reports a DEADLOCK, sets OT_BACKOFF_AND_RETRY
 to Open_table_context::m_action

My fix is to allow opening table in purge while flushing. It is already
done the same way in other maintainance facilities like REPAIR TABLE.

Another way would be making an actual backoff, but Open_table_context
does not allow to distinguish it from other failure types, which still
seem to be unexpected. Making this would require hacking into
Open_table_context interface for no benefit, in comparison to passing
MYSQL_OPEN_IGNORE_FLUSH during table open.
  • Loading branch information
FooBarrior committed Apr 27, 2021
1 parent 300253a commit 6ba5f81
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 3 deletions.
30 changes: 29 additions & 1 deletion mysql-test/suite/gcol/r/innodb_virtual_debug_purge.result
Expand Up @@ -232,7 +232,6 @@ set debug_sync= "now WAIT_FOR got_no_such_table TIMEOUT 1";
set global debug_dbug= @saved_dbug;
drop table t1;
set debug_sync=reset;
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
#
# MDEV-18546 ASAN heap-use-after-free
# in innobase_get_computed_value / row_purge
Expand Down Expand Up @@ -277,3 +276,32 @@ pk b v
DROP TABLE t1;
SET debug_sync= reset;
set global debug_dbug= @old_dbug;
# MDEV-16962 Assertion '!error || !ot_ctx.can_recover_from_failed_open()'
# failed in open_purge_table upon concurrent ALTER and FLUSH
CREATE TABLE t1 (
pk SERIAL,
c VARCHAR(128),
d DATE,
vd DATE AS (d) VIRTUAL,
PRIMARY KEY(pk),
KEY(vd,c)
) ENGINE=InnoDB;
INSERT IGNORE INTO t1 (pk,c) VALUES (1,'foo');
set debug_sync="now WAIT_FOR purge";
connect con1,localhost,root,,test;
SET GLOBAL innodb_debug_sync="after_open_table_mdl_shared SIGNAL purge WAIT_FOR flush";
SET global debug_dbug="d,ib_purge_virtual_index_callback";
REPLACE INTO t1 (pk,c) VALUES (1,'bar');
connection default;
SET debug_sync="alter_table_before_rename_result_table WAIT_FOR flush";
ALTER TABLE t1 ADD FULLTEXT KEY(c), ALGORITHM=COPY;
connection con1;
SET debug_sync="after_flush_unlock SIGNAL flush ";
FLUSH TABLES;
disconnect con1;
connection default;
InnoDB 0 transactions not purged
DROP TABLE t1;
SET debug_sync= reset;
SET global debug_dbug=@old_dbug;
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
50 changes: 49 additions & 1 deletion mysql-test/suite/gcol/t/innodb_virtual_debug_purge.test
Expand Up @@ -322,7 +322,6 @@ drop table t1;

--source include/wait_until_count_sessions.inc
set debug_sync=reset;
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;

--echo #
--echo # MDEV-18546 ASAN heap-use-after-free
Expand Down Expand Up @@ -386,3 +385,52 @@ SELECT * FROM t1;
DROP TABLE t1;
SET debug_sync= reset;
set global debug_dbug= @old_dbug;


--echo # MDEV-16962 Assertion '!error || !ot_ctx.can_recover_from_failed_open()'
--echo # failed in open_purge_table upon concurrent ALTER and FLUSH

CREATE TABLE t1 (
pk SERIAL,
c VARCHAR(128),
d DATE,
vd DATE AS (d) VIRTUAL,
PRIMARY KEY(pk),
KEY(vd,c)
) ENGINE=InnoDB;
INSERT IGNORE INTO t1 (pk,c) VALUES (1,'foo');

--send
set debug_sync="now WAIT_FOR purge";
--connect (con1,localhost,root,,test)
# Will break innodb purge thread inside open_purge_table after mdl
# acquired, but before tdc->flushed check
SET GLOBAL innodb_debug_sync="after_open_table_mdl_shared SIGNAL purge WAIT_FOR flush";

# Workaround to pass trx_undo_roll_ptr_is_insert() in 10.2
SET global debug_dbug="d,ib_purge_virtual_index_callback";

REPLACE INTO t1 (pk,c) VALUES (1,'bar');

--connection default
# wait for MDL acquired by purge
--reap
# MDL_SHARED will be acquired, but will hang before MDL upgrade started.
SET debug_sync="alter_table_before_rename_result_table WAIT_FOR flush";
--send
ALTER TABLE t1 ADD FULLTEXT KEY(c), ALGORITHM=COPY;
--connection con1
# Will hang after tdc->flushed is set, but before emptying tdc cache.
SET debug_sync="after_flush_unlock SIGNAL flush ";
FLUSH TABLES;

# Cleanup
--disconnect con1
--connection default
--reap
--source ../../innodb/include/wait_all_purged.inc
DROP TABLE t1;
SET debug_sync= reset;
SET global debug_dbug=@old_dbug;

SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
2 changes: 1 addition & 1 deletion sql/sql_class.cc
Expand Up @@ -4424,7 +4424,7 @@ TABLE *open_purge_table(THD *thd, const char *db, size_t dblen,
DBUG_ASSERT(thd->open_tables == NULL);
DBUG_ASSERT(thd->locked_tables_mode < LTM_PRELOCKED);

Open_table_context ot_ctx(thd, 0);
Open_table_context ot_ctx(thd, MYSQL_OPEN_IGNORE_FLUSH);
TABLE_LIST *tl= (TABLE_LIST*)thd->alloc(sizeof(TABLE_LIST));

tl->init_one_table(db, dblen, tb, tblen, tb, TL_READ);
Expand Down

0 comments on commit 6ba5f81

Please sign in to comment.