Skip to content

Commit

Permalink
MDEV-21577 MDL BF-BF conflict
Browse files Browse the repository at this point in the history
Some DDL statements appear to acquire MDL locks for a table referenced by
foreign key constraint from the actual affected table of the DDL statement.
OPTIMIZE, REPAIR and ALTER TABLE belong to this class of DDL statements.

Earlier MariaDB version did not take this in consideration, and appended
only affected table in the certification key list in write set.
Because of missing certification information, it could happen that e.g.
OPTIMIZE table for FK child table could be allowed to apply in parallel
with DML operating on the foreign key parent table, and this could lead to
unhandled MDL lock conflicts between two high priority appliers (BF).

The fix in this patch, changes the TOI replication for OPTIMIZE, REPAIR and
ALTER TABLE statements so that before the execution of respective DDL
statement, there is foreign key parent search round. This FK parent search
contains following steps:
* open and lock the affected table (with permissive shared locks)
* iterate over foreign key contstraints and collect and array of Fk parent
  table names
* close all tables open for the THD and release MDL locks
* do the actual TOI replication with the affected table and FK parent
  table names as key values

The patch contains also new mtr test for verifying that the above mentioned
DDL statements replicate without problems when operating on FK child table.
The mtr test scenario #1, which can be used to check if some other DDL
(on top of OPTIMIZE, REPAIR and ALTER) could cause similar excessive FK
parent table locking.

Reviewed-by: Aleksey Midenkov <aleksey.midenkov@mariadb.com>
Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
  • Loading branch information
sjaakola authored and Jan Lindström committed Nov 3, 2020
1 parent 5739c77 commit 4d6c661
Show file tree
Hide file tree
Showing 8 changed files with 495 additions and 34 deletions.
9 changes: 7 additions & 2 deletions include/wsrep.h
Expand Up @@ -27,10 +27,10 @@
if (WSREP_ON && WSREP(thd) && wsrep_to_isolation_begin(thd, db_, table_, table_list_)) \
goto wsrep_error_label;

#define WSREP_TO_ISOLATION_BEGIN_ALTER(db_, table_, table_list_, alter_info_) \
#define WSREP_TO_ISOLATION_BEGIN_ALTER(db_, table_, table_list_, alter_info_, fk_tables_) \
if (WSREP(thd) && wsrep_thd_is_local(thd) && \
wsrep_to_isolation_begin(thd, db_, table_, \
table_list_, alter_info_)) \
table_list_, alter_info_, fk_tables_)) \
goto wsrep_error_label;

#define WSREP_TO_ISOLATION_END \
Expand All @@ -46,6 +46,10 @@
if (WSREP(thd) && !thd->lex->no_write_to_binlog \
&& wsrep_to_isolation_begin(thd, db_, table_, table_list_)) goto wsrep_error_label;

#define WSREP_TO_ISOLATION_BEGIN_FK_TABLES(db_, table_, table_list_, fk_tables) \
if (WSREP(thd) && !thd->lex->no_write_to_binlog \
&& wsrep_to_isolation_begin(thd, db_, table_, table_list_, NULL, fk_tables)) goto wsrep_error_label;

#define WSREP_DEBUG(...) \
if (wsrep_debug) WSREP_LOG(sql_print_information, ##__VA_ARGS__)
#define WSREP_INFO(...) WSREP_LOG(sql_print_information, ##__VA_ARGS__)
Expand All @@ -69,6 +73,7 @@
#define WSREP_ERROR(...)
#define WSREP_TO_ISOLATION_BEGIN(db_, table_, table_list_) do { } while(0)
#define WSREP_TO_ISOLATION_BEGIN_ALTER(db_, table_, table_list_, alter_info_)
#define WSREP_TO_ISOLATION_BEGIN_FK_TABLES(db_, table_, table_list_, fk_tables_)
#define WSREP_TO_ISOLATION_END
#define WSREP_TO_ISOLATION_BEGIN_WRTCHK(db_, table_, table_list_)
#define WSREP_SYNC_WAIT(thd_, before_)
Expand Down
216 changes: 216 additions & 0 deletions mysql-test/suite/galera/r/galera_ddl_fk_conflict.result
@@ -0,0 +1,216 @@
connection node_2;
connection node_1;
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connection node_1a;
SET SESSION wsrep_sync_wait=0;
######################################################################
# Test for OPTIMIZE
######################################################################
######################################################################
#
# Scenario #1: DML working on FK parent table BF aborted by DDL
# over child table
#
######################################################################
connection node_1;
SET SESSION wsrep_sync_wait=0;
CREATE TABLE p (pk INTEGER PRIMARY KEY, f2 CHAR(30));
INSERT INTO p VALUES (1, 'INITIAL VALUE');
INSERT INTO p VALUES (2, 'INITIAL VALUE');
CREATE TABLE c (pk INTEGER PRIMARY KEY, fk INTEGER, FOREIGN KEY (fk) REFERENCES p(pk));
INSERT INTO c VALUES (1,1), (2,2);
connection node_1;
SET AUTOCOMMIT=ON;
START TRANSACTION;
UPDATE p SET f2 = 'TO DEADLOCK' WHERE pk = 1;
connection node_2;
SET SESSION wsrep_sync_wait=0;
OPTIMIZE TABLE c ;
Table Op Msg_type Msg_text
test.c optimize note Table does not support optimize, doing recreate + analyze instead
test.c optimize status OK
connection node_1;
COMMIT;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';
EXPECT_2
2
connection node_2;
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';
EXPECT_2
2
######################################################################
#
# Scenario #2: DML working on FK parent table tries to replicate, but
# fails in certification for earlier DDL on child table
#
######################################################################
connection node_1;
BEGIN;
SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync';
connection node_2;
OPTIMIZE TABLE c ;
Table Op Msg_type Msg_text
test.c optimize note Table does not support optimize, doing recreate + analyze instead
test.c optimize status OK
connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
connection node_1;
UPDATE p SET f2 = 'TO DEADLOCK' WHERE pk = 1;
COMMIT;
connection node_1a;
SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync';
connection node_1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
SELECT 'I deadlocked';
I deadlocked
I deadlocked
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';
EXPECT_2
2
connection node_2;
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';
EXPECT_2
2
DROP TABLE c;
DROP TABLE p;
######################################################################
# Test for REPAIR
######################################################################
######################################################################
#
# Scenario #1: DML working on FK parent table BF aborted by DDL
# over child table
#
######################################################################
connection node_1;
SET SESSION wsrep_sync_wait=0;
CREATE TABLE p (pk INTEGER PRIMARY KEY, f2 CHAR(30));
INSERT INTO p VALUES (1, 'INITIAL VALUE');
INSERT INTO p VALUES (2, 'INITIAL VALUE');
CREATE TABLE c (pk INTEGER PRIMARY KEY, fk INTEGER, FOREIGN KEY (fk) REFERENCES p(pk));
INSERT INTO c VALUES (1,1), (2,2);
connection node_1;
SET AUTOCOMMIT=ON;
START TRANSACTION;
UPDATE p SET f2 = 'TO DEADLOCK' WHERE pk = 1;
connection node_2;
SET SESSION wsrep_sync_wait=0;
REPAIR TABLE c ;
Table Op Msg_type Msg_text
test.c repair note The storage engine for the table doesn't support repair
connection node_1;
COMMIT;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';
EXPECT_2
2
connection node_2;
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';
EXPECT_2
2
######################################################################
#
# Scenario #2: DML working on FK parent table tries to replicate, but
# fails in certification for earlier DDL on child table
#
######################################################################
connection node_1;
BEGIN;
SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync';
connection node_2;
REPAIR TABLE c ;
Table Op Msg_type Msg_text
test.c repair note The storage engine for the table doesn't support repair
connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
connection node_1;
UPDATE p SET f2 = 'TO DEADLOCK' WHERE pk = 1;
COMMIT;
connection node_1a;
SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync';
connection node_1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
SELECT 'I deadlocked';
I deadlocked
I deadlocked
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';
EXPECT_2
2
connection node_2;
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';
EXPECT_2
2
DROP TABLE c;
DROP TABLE p;
######################################################################
# Test for ALTER ENGINE=INNODB
######################################################################
######################################################################
#
# Scenario #1: DML working on FK parent table BF aborted by DDL
# over child table
#
######################################################################
connection node_1;
SET SESSION wsrep_sync_wait=0;
CREATE TABLE p (pk INTEGER PRIMARY KEY, f2 CHAR(30));
INSERT INTO p VALUES (1, 'INITIAL VALUE');
INSERT INTO p VALUES (2, 'INITIAL VALUE');
CREATE TABLE c (pk INTEGER PRIMARY KEY, fk INTEGER, FOREIGN KEY (fk) REFERENCES p(pk));
INSERT INTO c VALUES (1,1), (2,2);
connection node_1;
SET AUTOCOMMIT=ON;
START TRANSACTION;
UPDATE p SET f2 = 'TO DEADLOCK' WHERE pk = 1;
connection node_2;
SET SESSION wsrep_sync_wait=0;
ALTER TABLE c ENGINE=INNODB;
connection node_1;
COMMIT;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';
EXPECT_2
2
connection node_2;
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';
EXPECT_2
2
######################################################################
#
# Scenario #2: DML working on FK parent table tries to replicate, but
# fails in certification for earlier DDL on child table
#
######################################################################
connection node_1;
BEGIN;
SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync';
connection node_2;
ALTER TABLE c ENGINE=INNODB;
connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
connection node_1;
UPDATE p SET f2 = 'TO DEADLOCK' WHERE pk = 1;
COMMIT;
connection node_1a;
SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync';
connection node_1;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
SELECT 'I deadlocked';
I deadlocked
I deadlocked
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';
EXPECT_2
2
connection node_2;
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';
EXPECT_2
2
DROP TABLE c;
DROP TABLE p;
114 changes: 114 additions & 0 deletions mysql-test/suite/galera/t/galera_ddl_fk_conflict.inc
@@ -0,0 +1,114 @@
#
# Test for MDL BF-BF lock conflict
# There are some DDL statements, which take extensive MDL lock for
# a table referenced by foreign key constraint from the actual affetec table.
# This extensive MDL lock may cause MDL BF-BF confclict situations, if the
# FK parent table is not listed as certification key in the replication write set.
# i.e. if replication allows such DDL to apply in parallel with regular DML operating
# on the FK parent table.
#
# This test has two scenarios, where DML modifies FK parent table in node 1,
# and offending DDL for FK child table is sent from node 2.
#
# param: $table_admin_command
# DDL table command to test, script will build full SQL statement:
# $table_admin_command TABLE c;
#
# param: $table_admin_command_end
# Optional additional SQL syntax to end the SQL statement, if any
# $table_admin_command TABLE c $table_admin_command_end;
#
# scenario 1, can be used to test if a DDL statement causes such MDL locking vulnerability.
# call this test script with some table DDL command in $table_admin_command
# if scenario 1 passes (especially COMMIT does fail for ER_LOCK_DEADLOCK),
# then this particular DDL is vulnerable. scenraio 2 should fail for this DDL
# unless code has not been fixed to append parent table certification keys for it.
#

--echo ######################################################################
--echo # Test for $table_admin_command $table_admin_command_end
--echo ######################################################################


--echo ######################################################################
--echo #
--echo # Scenario #1: DML working on FK parent table BF aborted by DDL
--echo # over child table
--echo #
--echo ######################################################################

--connection node_1
SET SESSION wsrep_sync_wait=0;

CREATE TABLE p (pk INTEGER PRIMARY KEY, f2 CHAR(30));
INSERT INTO p VALUES (1, 'INITIAL VALUE');
INSERT INTO p VALUES (2, 'INITIAL VALUE');

CREATE TABLE c (pk INTEGER PRIMARY KEY, fk INTEGER, FOREIGN KEY (fk) REFERENCES p(pk));
INSERT INTO c VALUES (1,1), (2,2);

--connection node_1
SET AUTOCOMMIT=ON;
START TRANSACTION;

UPDATE p SET f2 = 'TO DEADLOCK' WHERE pk = 1;

--connection node_2
SET SESSION wsrep_sync_wait=0;
--eval $table_admin_command TABLE c $table_admin_command_end

--connection node_1
--error ER_LOCK_DEADLOCK
COMMIT;

SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';

--connection node_2
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';

--echo ######################################################################
--echo #
--echo # Scenario #2: DML working on FK parent table tries to replicate, but
--echo # fails in certification for earlier DDL on child table
--echo #
--echo ######################################################################

--connection node_1
BEGIN;

# Block the applier on node #1 and issue DDL on node 2
--let $galera_sync_point = apply_monitor_slave_enter_sync
--source include/galera_set_sync_point.inc

--connection node_2
--eval $table_admin_command TABLE c $table_admin_command_end

--connection node_1a
--source include/galera_wait_sync_point.inc
--source include/galera_clear_sync_point.inc
--let $expected_cert_failures = `SELECT VARIABLE_VALUE+1 FROM information_schema.global_status WHERE VARIABLE_NAME = 'wsrep_local_cert_failures'`

--connection node_1
UPDATE p SET f2 = 'TO DEADLOCK' WHERE pk = 1;
--send COMMIT

--connection node_1a
--let $galera_sync_point = apply_monitor_slave_enter_sync
--source include/galera_signal_sync_point.inc

--let $wait_condition = SELECT VARIABLE_VALUE = $expected_cert_failures FROM information_schema.global_status WHERE VARIABLE_NAME = 'wsrep_local_cert_failures'
--source include/wait_condition.inc

--connection node_1
--error ER_LOCK_DEADLOCK
--reap

SELECT 'I deadlocked';

SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';

--connection node_2
SELECT COUNT(*) AS EXPECT_2 FROM p WHERE f2 = 'INITIAL VALUE';

DROP TABLE c;
DROP TABLE p;
25 changes: 25 additions & 0 deletions mysql-test/suite/galera/t/galera_ddl_fk_conflict.test
@@ -0,0 +1,25 @@
#
# MDL BF-BF lock conflict
#

--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_debug_sync.inc
--source include/galera_have_debug_sync.inc

--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
--connection node_1a
SET SESSION wsrep_sync_wait=0;

--let $table_admin_command = OPTIMIZE
--source galera_ddl_fk_conflict.inc

--let $table_admin_command = REPAIR
--source galera_ddl_fk_conflict.inc

--let $table_admin_command = ALTER
--let $table_admin_command_end = ENGINE=INNODB
--source galera_ddl_fk_conflict.inc

# CHECK and ANALYZE are not affected

0 comments on commit 4d6c661

Please sign in to comment.