-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MDEV-7249: Performance problem in parallel replication with multi-lev…
…el slaves Parallel replication (in 10.0 / "conservative" mode) relies on binlog group commits to group transactions that can be safely run in parallel on the slave. The --binlog-commit-wait-count and --binlog-commit-wait-usec options exist to increase the number of commits per group. But in case of conflicts between transactions, this can cause unnecessary delay and reduced througput, especially on a slave where commit order is fixed. This patch adds a heuristics to reduce this problem. When transaction T1 goes to commit, it will first wait for N transactions to queue up for a group commit. However, if we detect that another transaction T2 is waiting for a row lock held by T1, then we will skip the wait and let T1 commit immediately, releasing locks and let T2 continue. On a slave, this avoids the unfortunate situation where T1 is waiting for T2 to join the group commit, but T2 is waiting for T1 to release locks, causing no work to be done for the duration of the --binlog-commit-wait-usec timeout. (The heuristic seems reasonable on the master as well, so it is enabled for all transactions, not just replication transactions).
- Loading branch information
Showing
10 changed files
with
267 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; | ||
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; | ||
SET @old_count= @@GLOBAL.binlog_commit_wait_count; | ||
SET GLOBAL binlog_commit_wait_count= 3; | ||
SET @old_usec= @@GLOBAL.binlog_commit_wait_usec; | ||
SET GLOBAL binlog_commit_wait_usec= 20000000; | ||
SET @a= current_timestamp(); | ||
BEGIN; | ||
INSERT INTO t1 VALUES (1,0); | ||
COMMIT; | ||
INSERT INTO t1 VALUES (1,1); | ||
SET @b= unix_timestamp(current_timestamp()) - unix_timestamp(@a); | ||
SELECT IF(@b < 20, "Ok", CONCAT("Error: too much time elapsed: ", @b, " seconds >= 20")); | ||
IF(@b < 20, "Ok", CONCAT("Error: too much time elapsed: ", @b, " seconds >= 20")) | ||
Ok | ||
ERROR 23000: Duplicate entry '1' for key 'PRIMARY' | ||
SET @a= current_timestamp(); | ||
INSERT INTO t1 VALUES (2,0); | ||
INSERT INTO t1 VALUES (3,0); | ||
INSERT INTO t1 VALUES (4,0); | ||
SET @b= unix_timestamp(current_timestamp()) - unix_timestamp(@a); | ||
SELECT IF(@b < 20, "Ok", CONCAT("Error: too much time elapsed: ", @b, " seconds >= 20")); | ||
IF(@b < 20, "Ok", CONCAT("Error: too much time elapsed: ", @b, " seconds >= 20")) | ||
Ok | ||
SET @a= current_timestamp(); | ||
INSERT INTO t1 VALUES (6,0); | ||
BEGIN; | ||
UPDATE t1 SET b=b+1 WHERE a=1; | ||
UPDATE t1 SET b=b+10 WHERE a=1; | ||
SELECT SLEEP(0.25); | ||
SLEEP(0.25) | ||
0 | ||
UPDATE t1 SET b=b+1 WHERE a=3; | ||
COMMIT; | ||
SET @b= unix_timestamp(current_timestamp()) - unix_timestamp(@a); | ||
SELECT IF(@b < 20, "Ok", CONCAT("Error: too much time elapsed: ", @b, " seconds >= 20")); | ||
IF(@b < 20, "Ok", CONCAT("Error: too much time elapsed: ", @b, " seconds >= 20")) | ||
Ok | ||
SET @a= current_timestamp(); | ||
INSERT INTO t1 VALUES (7,0); | ||
INSERT INTO t1 VALUES (8,0); | ||
SET @b= unix_timestamp(current_timestamp()) - unix_timestamp(@a); | ||
SELECT IF(@b < 20, "Ok", CONCAT("Error: too much time elapsed: ", @b, " seconds >= 20")); | ||
IF(@b < 20, "Ok", CONCAT("Error: too much time elapsed: ", @b, " seconds >= 20")) | ||
Ok | ||
SELECT * FROM t1 ORDER BY a; | ||
a b | ||
1 11 | ||
2 0 | ||
3 1 | ||
4 0 | ||
6 0 | ||
7 0 | ||
8 0 | ||
DROP TABLE t1; | ||
SET GLOBAL binlog_commit_wait_count= @old_count; | ||
SET GLOBAL binlog_commit_wait_usec= @old_usec; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
--source include/have_innodb.inc | ||
--source include/have_log_bin.inc | ||
|
||
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; | ||
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; | ||
|
||
SET @old_count= @@GLOBAL.binlog_commit_wait_count; | ||
SET GLOBAL binlog_commit_wait_count= 3; | ||
SET @old_usec= @@GLOBAL.binlog_commit_wait_usec; | ||
SET GLOBAL binlog_commit_wait_usec= 20000000; | ||
|
||
connect(con1,localhost,root,,test); | ||
connect(con2,localhost,root,,test); | ||
connect(con3,localhost,root,,test); | ||
|
||
# Check that if T2 goes to wait for a row lock of T1 while T1 is waiting for | ||
# more transactions to arrive for group commit, the commit of T1 will complete | ||
# immediately. | ||
# We test this by setting a very high timeout (20 seconds), and testing that | ||
# that much time does not elapse. | ||
|
||
--connection default | ||
SET @a= current_timestamp(); | ||
|
||
--connection con1 | ||
BEGIN; | ||
INSERT INTO t1 VALUES (1,0); | ||
send COMMIT; | ||
|
||
--connection con2 | ||
send INSERT INTO t1 VALUES (1,1); | ||
|
||
--connection con1 | ||
reap; | ||
|
||
--connection default | ||
SET @b= unix_timestamp(current_timestamp()) - unix_timestamp(@a); | ||
SELECT IF(@b < 20, "Ok", CONCAT("Error: too much time elapsed: ", @b, " seconds >= 20")); | ||
|
||
--connection con2 | ||
--error ER_DUP_ENTRY | ||
reap; | ||
|
||
|
||
# Test that the commit triggers when sufficient commits have queued up. | ||
--connection default | ||
SET @a= current_timestamp(); | ||
|
||
--connection con1 | ||
send INSERT INTO t1 VALUES (2,0); | ||
|
||
--connection con2 | ||
send INSERT INTO t1 VALUES (3,0); | ||
|
||
--connection con3 | ||
INSERT INTO t1 VALUES (4,0); | ||
|
||
--connection con1 | ||
reap; | ||
--connection con2 | ||
reap; | ||
|
||
--connection default | ||
SET @b= unix_timestamp(current_timestamp()) - unix_timestamp(@a); | ||
SELECT IF(@b < 20, "Ok", CONCAT("Error: too much time elapsed: ", @b, " seconds >= 20")); | ||
|
||
|
||
# Test that commit triggers immediately if there is already a transaction | ||
# waiting on another transaction that reaches its commit. | ||
|
||
--connection default | ||
SET @a= current_timestamp(); | ||
|
||
--connection con1 | ||
send INSERT INTO t1 VALUES (6,0); | ||
|
||
--connection con2 | ||
BEGIN; | ||
UPDATE t1 SET b=b+1 WHERE a=1; | ||
|
||
--connection con3 | ||
send UPDATE t1 SET b=b+10 WHERE a=1; | ||
|
||
--connection con2 | ||
# A small sleep to let con3 have time to wait on con2. | ||
# The sleep might be too small on loaded host, but that is not a big problem; | ||
# it only means we will trigger a different code path (con3 waits after con2 | ||
# is ready to commit rather than before); and either path should work the same. | ||
# So we will not get false positive in case of different timing; at worst false | ||
# negative. | ||
SELECT SLEEP(0.25); | ||
UPDATE t1 SET b=b+1 WHERE a=3; | ||
COMMIT; | ||
|
||
--connection con1 | ||
reap; | ||
|
||
--connection default | ||
SET @b= unix_timestamp(current_timestamp()) - unix_timestamp(@a); | ||
SELECT IF(@b < 20, "Ok", CONCAT("Error: too much time elapsed: ", @b, " seconds >= 20")); | ||
|
||
--connection default | ||
SET @a= current_timestamp(); | ||
|
||
# Now con3 will be waiting for a following group commit to trigger. | ||
--connection con1 | ||
send INSERT INTO t1 VALUES (7,0); | ||
--connection con2 | ||
INSERT INTO t1 VALUES (8,0); | ||
--connection con3 | ||
reap; | ||
|
||
--connection default | ||
SET @b= unix_timestamp(current_timestamp()) - unix_timestamp(@a); | ||
SELECT IF(@b < 20, "Ok", CONCAT("Error: too much time elapsed: ", @b, " seconds >= 20")); | ||
|
||
--connection default | ||
SELECT * FROM t1 ORDER BY a; | ||
|
||
--connection default | ||
DROP TABLE t1; | ||
SET GLOBAL binlog_commit_wait_count= @old_count; | ||
SET GLOBAL binlog_commit_wait_usec= @old_usec; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.