Skip to content

Commit 6ba5f81

Browse files
committed
MDEV-16962 Assertion failed in open_purge_table upon concurrent ALTER/FLUSH
So we are having a race condition of three of threads, resulting in a deadlock backoff in purge, which is unexpected. More precisely, the following happens: T1: NOCOPY ALTER TABLE begins, and eventually it holds MDL_SHARED_NO_WRITE lock; T2: FLUSH TABLES begins. it sets share->tdc->flushed = true T3: purge on a record with virtual column begins. it is going to open a table. MDL_SHARED_READ lock is acquired therefore. Since share->tdc->flushed is set, it waits for a TDC purge end. T1: is going to elevate MDL LOCK to exclusive and therefore has to set other waiters to back off. T3: receives VICTIM status, reports a DEADLOCK, sets OT_BACKOFF_AND_RETRY to Open_table_context::m_action My fix is to allow opening table in purge while flushing. It is already done the same way in other maintainance facilities like REPAIR TABLE. Another way would be making an actual backoff, but Open_table_context does not allow to distinguish it from other failure types, which still seem to be unexpected. Making this would require hacking into Open_table_context interface for no benefit, in comparison to passing MYSQL_OPEN_IGNORE_FLUSH during table open.
1 parent 300253a commit 6ba5f81

File tree

3 files changed

+79
-3
lines changed

3 files changed

+79
-3
lines changed

mysql-test/suite/gcol/r/innodb_virtual_debug_purge.result

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,6 @@ set debug_sync= "now WAIT_FOR got_no_such_table TIMEOUT 1";
232232
set global debug_dbug= @saved_dbug;
233233
drop table t1;
234234
set debug_sync=reset;
235-
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
236235
#
237236
# MDEV-18546 ASAN heap-use-after-free
238237
# in innobase_get_computed_value / row_purge
@@ -277,3 +276,32 @@ pk b v
277276
DROP TABLE t1;
278277
SET debug_sync= reset;
279278
set global debug_dbug= @old_dbug;
279+
# MDEV-16962 Assertion '!error || !ot_ctx.can_recover_from_failed_open()'
280+
# failed in open_purge_table upon concurrent ALTER and FLUSH
281+
CREATE TABLE t1 (
282+
pk SERIAL,
283+
c VARCHAR(128),
284+
d DATE,
285+
vd DATE AS (d) VIRTUAL,
286+
PRIMARY KEY(pk),
287+
KEY(vd,c)
288+
) ENGINE=InnoDB;
289+
INSERT IGNORE INTO t1 (pk,c) VALUES (1,'foo');
290+
set debug_sync="now WAIT_FOR purge";
291+
connect con1,localhost,root,,test;
292+
SET GLOBAL innodb_debug_sync="after_open_table_mdl_shared SIGNAL purge WAIT_FOR flush";
293+
SET global debug_dbug="d,ib_purge_virtual_index_callback";
294+
REPLACE INTO t1 (pk,c) VALUES (1,'bar');
295+
connection default;
296+
SET debug_sync="alter_table_before_rename_result_table WAIT_FOR flush";
297+
ALTER TABLE t1 ADD FULLTEXT KEY(c), ALGORITHM=COPY;
298+
connection con1;
299+
SET debug_sync="after_flush_unlock SIGNAL flush ";
300+
FLUSH TABLES;
301+
disconnect con1;
302+
connection default;
303+
InnoDB 0 transactions not purged
304+
DROP TABLE t1;
305+
SET debug_sync= reset;
306+
SET global debug_dbug=@old_dbug;
307+
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;

mysql-test/suite/gcol/t/innodb_virtual_debug_purge.test

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,6 @@ drop table t1;
322322

323323
--source include/wait_until_count_sessions.inc
324324
set debug_sync=reset;
325-
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
326325

327326
--echo #
328327
--echo # MDEV-18546 ASAN heap-use-after-free
@@ -386,3 +385,52 @@ SELECT * FROM t1;
386385
DROP TABLE t1;
387386
SET debug_sync= reset;
388387
set global debug_dbug= @old_dbug;
388+
389+
390+
--echo # MDEV-16962 Assertion '!error || !ot_ctx.can_recover_from_failed_open()'
391+
--echo # failed in open_purge_table upon concurrent ALTER and FLUSH
392+
393+
CREATE TABLE t1 (
394+
pk SERIAL,
395+
c VARCHAR(128),
396+
d DATE,
397+
vd DATE AS (d) VIRTUAL,
398+
PRIMARY KEY(pk),
399+
KEY(vd,c)
400+
) ENGINE=InnoDB;
401+
INSERT IGNORE INTO t1 (pk,c) VALUES (1,'foo');
402+
403+
--send
404+
set debug_sync="now WAIT_FOR purge";
405+
--connect (con1,localhost,root,,test)
406+
# Will break innodb purge thread inside open_purge_table after mdl
407+
# acquired, but before tdc->flushed check
408+
SET GLOBAL innodb_debug_sync="after_open_table_mdl_shared SIGNAL purge WAIT_FOR flush";
409+
410+
# Workaround to pass trx_undo_roll_ptr_is_insert() in 10.2
411+
SET global debug_dbug="d,ib_purge_virtual_index_callback";
412+
413+
REPLACE INTO t1 (pk,c) VALUES (1,'bar');
414+
415+
--connection default
416+
# wait for MDL acquired by purge
417+
--reap
418+
# MDL_SHARED will be acquired, but will hang before MDL upgrade started.
419+
SET debug_sync="alter_table_before_rename_result_table WAIT_FOR flush";
420+
--send
421+
ALTER TABLE t1 ADD FULLTEXT KEY(c), ALGORITHM=COPY;
422+
--connection con1
423+
# Will hang after tdc->flushed is set, but before emptying tdc cache.
424+
SET debug_sync="after_flush_unlock SIGNAL flush ";
425+
FLUSH TABLES;
426+
427+
# Cleanup
428+
--disconnect con1
429+
--connection default
430+
--reap
431+
--source ../../innodb/include/wait_all_purged.inc
432+
DROP TABLE t1;
433+
SET debug_sync= reset;
434+
SET global debug_dbug=@old_dbug;
435+
436+
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;

sql/sql_class.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4424,7 +4424,7 @@ TABLE *open_purge_table(THD *thd, const char *db, size_t dblen,
44244424
DBUG_ASSERT(thd->open_tables == NULL);
44254425
DBUG_ASSERT(thd->locked_tables_mode < LTM_PRELOCKED);
44264426

4427-
Open_table_context ot_ctx(thd, 0);
4427+
Open_table_context ot_ctx(thd, MYSQL_OPEN_IGNORE_FLUSH);
44284428
TABLE_LIST *tl= (TABLE_LIST*)thd->alloc(sizeof(TABLE_LIST));
44294429

44304430
tl->init_one_table(db, dblen, tb, tblen, tb, TL_READ);

0 commit comments

Comments
 (0)