Skip to content

Commit 161e4bf

Browse files
committed
MDEV-25902 Unexpected ER_LOCK_WAIT_TIMEOUT and result
trans_rollback_to_savepoint(): Only release metadata locks (MDL) if the storage engines agree, after the changes were already rolled back. Ever since commit 3792693 and mysql/mysql-server@55ceedb we used to cheat here and always release MDL if the binlog is disabled. MDL are supposed to prevent race conditions between DML and DDL also when no replication is in use. MDL are supposed to be a superset of InnoDB table locks: InnoDB table lock may only exist if the thread also holds MDL on the table name. In the included test case, ROLLBACK TO SAVEPOINT would wrongly release the MDL on both tables and let ALTER TABLE proceed, even though the DML transaction is actually holding locks on the table. Until commit 1bd681c (MDEV-25506) InnoDB worked around the locking violation in a blatantly non-ACID way: If locks exist on a table that is being dropped (in this case, actually a partition of a table that is being rebuilt by ALTER TABLE), InnoDB would move the table (or partition) into a queue, to be dropped after the locks and references had been released. The scenario of commit 3792693 is unaffected by this fix, because mariadb-dump (a.k.a. mysqldump) would use non-locking reads, and the transaction would not be holding any InnoDB locks during the execution of ROLLBACK TO SAVEPOINT. MVCC reads inside InnoDB are only covered by MDL and page latches, not by any table or record locks. FIXME: It would be nice if storage engines were specifically asked which MDL can be released, instead of only offering a choice between all or nothing. InnoDB should be able to release any locks for tables that are no longer in trx_t::mod_tables, except if another transaction had converted some implicit record locks to explicit ones, before the ROLLBACK TO SAVEPOINT had been completed. Reviewed by: Sergei Golubchik
1 parent 8c5c3a4 commit 161e4bf

File tree

3 files changed

+73
-29
lines changed

3 files changed

+73
-29
lines changed

mysql-test/suite/innodb/r/alter_partitioned.result

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,33 @@ CREATE TABLE t1 (id INT PRIMARY KEY, a INT, va INT AS (a) VIRTUAL)
1616
ENGINE=InnoDB PARTITION BY HASH(id) PARTITIONS 2;
1717
ALTER TABLE t1 ADD b INT, ALGORITHM=INSTANT;
1818
DROP TABLE t1;
19+
#
20+
# MDEV-25902 Unexpected ER_LOCK_WAIT_TIMEOUT and result,
21+
# assertion failure err != DB_DUPLICATE_KEY
22+
CREATE TABLE t1 (pk INT PRIMARY KEY) ENGINE=InnoDB;
23+
CREATE TABLE t2 (pk INT PRIMARY KEY) ENGINE=InnoDB;
24+
connect con1,localhost,root,,test;
25+
START TRANSACTION;
26+
INSERT INTO t2 (pk) VALUES (1);
27+
SAVEPOINT sp;
28+
INSERT INTO t1 (pk) VALUES (1);
29+
ROLLBACK TO SAVEPOINT sp;
30+
connection default;
31+
SET lock_wait_timeout=0;
32+
SET innodb_lock_wait_timeout=0;
33+
ALTER TABLE t1 PARTITION BY HASH(pk);
34+
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
35+
SHOW CREATE TABLE t1;
36+
Table Create Table
37+
t1 CREATE TABLE `t1` (
38+
`pk` int(11) NOT NULL,
39+
PRIMARY KEY (`pk`)
40+
) ENGINE=InnoDB DEFAULT CHARSET=latin1
41+
connection con1;
42+
COMMIT;
43+
connection default;
44+
ALTER TABLE t2 PARTITION BY HASH(pk);
45+
disconnect con1;
46+
connection default;
47+
DROP TABLE t1, t2;
48+
# End of 10.6 tests

mysql-test/suite/innodb/t/alter_partitioned.test

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,36 @@ CREATE TABLE t1 (id INT PRIMARY KEY, a INT, va INT AS (a) VIRTUAL)
2222
ENGINE=InnoDB PARTITION BY HASH(id) PARTITIONS 2;
2323
ALTER TABLE t1 ADD b INT, ALGORITHM=INSTANT;
2424
DROP TABLE t1;
25+
26+
--echo #
27+
--echo # MDEV-25902 Unexpected ER_LOCK_WAIT_TIMEOUT and result,
28+
--echo # assertion failure err != DB_DUPLICATE_KEY
29+
30+
CREATE TABLE t1 (pk INT PRIMARY KEY) ENGINE=InnoDB;
31+
CREATE TABLE t2 (pk INT PRIMARY KEY) ENGINE=InnoDB;
32+
33+
--connect (con1,localhost,root,,test)
34+
35+
START TRANSACTION;
36+
INSERT INTO t2 (pk) VALUES (1);
37+
SAVEPOINT sp;
38+
INSERT INTO t1 (pk) VALUES (1);
39+
ROLLBACK TO SAVEPOINT sp;
40+
41+
--connection default
42+
SET lock_wait_timeout=0;
43+
SET innodb_lock_wait_timeout=0;
44+
--error ER_LOCK_WAIT_TIMEOUT
45+
ALTER TABLE t1 PARTITION BY HASH(pk);
46+
47+
SHOW CREATE TABLE t1;
48+
--connection con1
49+
COMMIT;
50+
--connection default
51+
ALTER TABLE t2 PARTITION BY HASH(pk);
52+
# Cleanup
53+
--disconnect con1
54+
--connection default
55+
DROP TABLE t1, t2;
56+
57+
--echo # End of 10.6 tests

sql/transaction.cc

Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
2-
Copyright (c) 2009, 2020, MariaDB Corporation.
2+
Copyright (c) 2009, 2021, MariaDB Corporation.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License as published by
@@ -667,33 +667,6 @@ bool trans_rollback_to_savepoint(THD *thd, LEX_CSTRING name)
667667
if (thd->transaction->xid_state.check_has_uncommitted_xa())
668668
DBUG_RETURN(TRUE);
669669

670-
/**
671-
Checking whether it is safe to release metadata locks acquired after
672-
savepoint, if rollback to savepoint is successful.
673-
674-
Whether it is safe to release MDL after rollback to savepoint depends
675-
on storage engines participating in transaction:
676-
677-
- InnoDB doesn't release any row-locks on rollback to savepoint so it
678-
is probably a bad idea to release MDL as well.
679-
- Binary log implementation in some cases (e.g when non-transactional
680-
tables involved) may choose not to remove events added after savepoint
681-
from transactional cache, but instead will write them to binary
682-
log accompanied with ROLLBACK TO SAVEPOINT statement. Since the real
683-
write happens at the end of transaction releasing MDL on tables
684-
mentioned in these events (i.e. acquired after savepoint and before
685-
rollback ot it) can break replication, as concurrent DROP TABLES
686-
statements will be able to drop these tables before events will get
687-
into binary log,
688-
689-
For backward-compatibility reasons we always release MDL if binary
690-
logging is off.
691-
*/
692-
bool mdl_can_safely_rollback_to_savepoint=
693-
(!((WSREP_EMULATE_BINLOG_NNULL(thd) || mysql_bin_log.is_open())
694-
&& thd->variables.sql_log_bin) ||
695-
ha_rollback_to_savepoint_can_release_mdl(thd));
696-
697670
if (ha_rollback_to_savepoint(thd, sv))
698671
res= TRUE;
699672
else if (((thd->variables.option_bits & OPTION_KEEP_LOG) ||
@@ -705,7 +678,15 @@ bool trans_rollback_to_savepoint(THD *thd, LEX_CSTRING name)
705678

706679
thd->transaction->savepoints= sv;
707680

708-
if (!res && mdl_can_safely_rollback_to_savepoint)
681+
if (res)
682+
/* An error occurred during rollback; we cannot release any MDL */;
683+
else if (thd->variables.sql_log_bin &&
684+
(WSREP_EMULATE_BINLOG_NNULL(thd) || mysql_bin_log.is_open()))
685+
/* In some cases (such as with non-transactional tables) we may
686+
choose to preserve events that were added after the SAVEPOINT,
687+
delimiting them by SAVEPOINT and ROLLBACK TO SAVEPOINT statements.
688+
Prematurely releasing MDL on such objects would break replication. */;
689+
else if (ha_rollback_to_savepoint_can_release_mdl(thd))
709690
thd->mdl_context.rollback_to_savepoint(sv->mdl_savepoint);
710691

711692
DBUG_RETURN(MY_TEST(res));

0 commit comments

Comments
 (0)