Skip to content

Commit 96784eb

Browse files
committed
MDEV-7668: Intermediate master groups CREATE TEMPORARY with INSERT, causing parallel replication failure
Parallel replication depends on locking (table locks, row locks, etc.) to prevent two conflicting transactions from running and committing in parallel. But temporary tables are designed to be visible only to one thread, and have no such locking. In the concrete issue, an intermediate master could commit a CREATE TEMPORARY TABLE in the same group commit as in INSERT into that table. Thus, a lower-level master could attempt to run them in parallel and get an error. More generally, we need protection from parallel replication trying to run transactions in parallel that access a common temporary table. This patch simply causes use of a temporary table from parallel replication to wait for all previous transactions to commit, serialising the replication at that point. (A more fine-grained locking could be added later, possibly. However, using temporary tables in statement-based replication is in any case normally undesirable; for example a restart of the server will lose temporary tables and can break replication). Note that row-based replication is not affected, as it does not do any temporary tables on the slave-side. This patch also cleans up the locking around protecting the list of temporary tables in Relay_log_info. This used to take the rli->data_lock at the end of every statement, which is very bad for concurrency. With this patch, the lock is not taken unless temporary tables (with statement-based binlogging) are in use on the slave.
1 parent 040027c commit 96784eb

File tree

4 files changed

+192
-13
lines changed

4 files changed

+192
-13
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
include/rpl_init.inc [topology=1->2->3]
2+
*** MDEV-7668: Intermediate master groups CREATE with INSERT, causing parallel replication failure ***
3+
SET @old_updates= @@GLOBAL.binlog_direct_non_transactional_updates;
4+
SET GLOBAL binlog_direct_non_transactional_updates=OFF;
5+
SET SESSION binlog_direct_non_transactional_updates=OFF;
6+
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
7+
CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
8+
include/stop_slave.inc
9+
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
10+
SET GLOBAL slave_parallel_threads=10;
11+
SET @old_commit_count=@@GLOBAL.binlog_commit_wait_count;
12+
SET GLOBAL binlog_commit_wait_count=2;
13+
SET @old_commit_usec=@@GLOBAL.binlog_commit_wait_usec;
14+
SET GLOBAL binlog_commit_wait_usec=2000000;
15+
SET @old_updates= @@GLOBAL.binlog_direct_non_transactional_updates;
16+
SET GLOBAL binlog_direct_non_transactional_updates=OFF;
17+
SET SESSION binlog_direct_non_transactional_updates=OFF;
18+
CHANGE MASTER TO master_use_gtid=current_pos;
19+
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
20+
include/stop_slave.inc
21+
SET GLOBAL slave_parallel_threads=10;
22+
CHANGE MASTER TO master_use_gtid=current_pos;
23+
BEGIN;
24+
CREATE TEMPORARY TABLE t2 (a INT PRIMARY KEY) ENGINE=MEMORY;
25+
COMMIT;
26+
INSERT INTO t2 VALUES (1);
27+
INSERT INTO t1 SELECT a, a*10 FROM t2;
28+
DROP TABLE t2;
29+
include/save_master_gtid.inc
30+
include/start_slave.inc
31+
include/sync_with_master_gtid.inc
32+
SELECT * FROM t1 ORDER BY a;
33+
a b
34+
1 10
35+
include/start_slave.inc
36+
include/sync_with_master_gtid.inc
37+
SELECT * FROM t1 ORDER BY a;
38+
a b
39+
1 10
40+
include/stop_slave.inc
41+
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
42+
SET GLOBAL binlog_commit_wait_count=@old_commit_count;
43+
SET GLOBAL binlog_commit_wait_usec=@old_commit_usec;
44+
SET GLOBAL binlog_direct_non_transactional_updates= @old_updates;
45+
include/start_slave.inc
46+
include/stop_slave.inc
47+
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
48+
include/start_slave.inc
49+
SET GLOBAL binlog_direct_non_transactional_updates= @old_updates;
50+
CALL mtr.add_suppression("Statement accesses nontransactional table as well as transactional or temporary table");
51+
DROP TABLE t1;
52+
include/rpl_end.inc
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
!include ../my.cnf
2+
3+
[mysqld.1]
4+
log-slave-updates
5+
loose-innodb
6+
7+
[mysqld.2]
8+
log-slave-updates
9+
loose-innodb
10+
11+
[mysqld.3]
12+
log-slave-updates
13+
loose-innodb
14+
15+
[ENV]
16+
SERVER_MYPORT_3= @mysqld.3.port
17+
SERVER_MYSOCK_3= @mysqld.3.socket
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
--source include/have_innodb.inc
2+
--let $rpl_topology=1->2->3
3+
--source include/rpl_init.inc
4+
5+
--echo *** MDEV-7668: Intermediate master groups CREATE with INSERT, causing parallel replication failure ***
6+
7+
--connection server_1
8+
SET @old_updates= @@GLOBAL.binlog_direct_non_transactional_updates;
9+
SET GLOBAL binlog_direct_non_transactional_updates=OFF;
10+
SET SESSION binlog_direct_non_transactional_updates=OFF;
11+
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
12+
CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
13+
--save_master_pos
14+
15+
--connection server_2
16+
--sync_with_master
17+
--save_master_pos
18+
--source include/stop_slave.inc
19+
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
20+
SET GLOBAL slave_parallel_threads=10;
21+
SET @old_commit_count=@@GLOBAL.binlog_commit_wait_count;
22+
SET GLOBAL binlog_commit_wait_count=2;
23+
SET @old_commit_usec=@@GLOBAL.binlog_commit_wait_usec;
24+
SET GLOBAL binlog_commit_wait_usec=2000000;
25+
SET @old_updates= @@GLOBAL.binlog_direct_non_transactional_updates;
26+
SET GLOBAL binlog_direct_non_transactional_updates=OFF;
27+
SET SESSION binlog_direct_non_transactional_updates=OFF;
28+
CHANGE MASTER TO master_use_gtid=current_pos;
29+
30+
--connection server_3
31+
--sync_with_master
32+
--save_master_pos
33+
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
34+
--source include/stop_slave.inc
35+
SET GLOBAL slave_parallel_threads=10;
36+
CHANGE MASTER TO master_use_gtid=current_pos;
37+
38+
39+
--connection server_1
40+
41+
BEGIN;
42+
CREATE TEMPORARY TABLE t2 (a INT PRIMARY KEY) ENGINE=MEMORY;
43+
COMMIT;
44+
INSERT INTO t2 VALUES (1);
45+
INSERT INTO t1 SELECT a, a*10 FROM t2;
46+
DROP TABLE t2;
47+
--source include/save_master_gtid.inc
48+
49+
--connection server_2
50+
--source include/start_slave.inc
51+
--source include/sync_with_master_gtid.inc
52+
SELECT * FROM t1 ORDER BY a;
53+
54+
--connection server_3
55+
--source include/start_slave.inc
56+
--source include/sync_with_master_gtid.inc
57+
SELECT * FROM t1 ORDER BY a;
58+
59+
60+
# Clean up
61+
62+
--connection server_2
63+
--source include/stop_slave.inc
64+
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
65+
SET GLOBAL binlog_commit_wait_count=@old_commit_count;
66+
SET GLOBAL binlog_commit_wait_usec=@old_commit_usec;
67+
SET GLOBAL binlog_direct_non_transactional_updates= @old_updates;
68+
--source include/start_slave.inc
69+
70+
--connection server_3
71+
--source include/stop_slave.inc
72+
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
73+
--source include/start_slave.inc
74+
75+
--connection server_1
76+
SET GLOBAL binlog_direct_non_transactional_updates= @old_updates;
77+
CALL mtr.add_suppression("Statement accesses nontransactional table as well as transactional or temporary table");
78+
DROP TABLE t1;
79+
80+
--source include/rpl_end.inc

sql/sql_base.cc

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -661,20 +661,23 @@ static void mark_temp_tables_as_free_for_reuse(THD *thd)
661661
DBUG_VOID_RETURN;
662662
}
663663

664-
thd->lock_temporary_tables();
665-
for (TABLE *table= thd->temporary_tables ; table ; table= table->next)
664+
if (thd->temporary_tables)
666665
{
667-
if ((table->query_id == thd->query_id) && ! table->open_by_handler)
668-
mark_tmp_table_for_reuse(table);
669-
}
670-
thd->unlock_temporary_tables();
671-
if (thd->rgi_slave)
672-
{
673-
/*
674-
Temporary tables are shared with other by sql execution threads.
675-
As a safety messure, clear the pointer to the common area.
676-
*/
677-
thd->temporary_tables= 0;
666+
thd->lock_temporary_tables();
667+
for (TABLE *table= thd->temporary_tables ; table ; table= table->next)
668+
{
669+
if ((table->query_id == thd->query_id) && ! table->open_by_handler)
670+
mark_tmp_table_for_reuse(table);
671+
}
672+
thd->unlock_temporary_tables();
673+
if (thd->rgi_slave)
674+
{
675+
/*
676+
Temporary tables are shared with other by sql execution threads.
677+
As a safety messure, clear the pointer to the common area.
678+
*/
679+
thd->temporary_tables= 0;
680+
}
678681
}
679682
DBUG_VOID_RETURN;
680683
}
@@ -5586,6 +5589,14 @@ TABLE *open_table_uncached(THD *thd, handlerton *hton,
55865589
(uint) thd->variables.server_id,
55875590
(ulong) thd->variables.pseudo_thread_id));
55885591

5592+
if (add_to_temporary_tables_list)
5593+
{
5594+
/* Temporary tables are not safe for parallel replication. */
5595+
if (thd->rgi_slave && thd->rgi_slave->is_parallel_exec &&
5596+
thd->wait_for_prior_commit())
5597+
return NULL;
5598+
}
5599+
55895600
/* Create the cache_key for temporary tables */
55905601
key_length= create_tmp_table_def_key(thd, cache_key, db, table_name);
55915602

@@ -5822,6 +5833,25 @@ bool open_temporary_table(THD *thd, TABLE_LIST *tl)
58225833
DBUG_RETURN(FALSE);
58235834
}
58245835

5836+
/*
5837+
Temporary tables are not safe for parallel replication. They were
5838+
designed to be visible to one thread only, so have no table locking.
5839+
Thus there is no protection against two conflicting transactions
5840+
committing in parallel and things like that.
5841+
5842+
So for now, anything that uses temporary tables will be serialised
5843+
with anything before it, when using parallel replication.
5844+
5845+
ToDo: We might be able to introduce a reference count or something
5846+
on temp tables, and have slave worker threads wait for it to reach
5847+
zero before being allowed to use the temp table. Might not be worth
5848+
it though, as statement-based replication using temporary tables is
5849+
in any case rather fragile.
5850+
*/
5851+
if (thd->rgi_slave && thd->rgi_slave->is_parallel_exec &&
5852+
thd->wait_for_prior_commit())
5853+
DBUG_RETURN(true);
5854+
58255855
#ifdef WITH_PARTITION_STORAGE_ENGINE
58265856
if (tl->partition_names)
58275857
{

0 commit comments

Comments
 (0)