From 999c43aeb73f60336b2ebf74821983fee37b96dc Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Wed, 2 Sep 2015 09:57:18 +0200 Subject: [PATCH 1/2] MDEV-8725: Assertion `!(thd->rgi_slave && thd-> rgi_slave->did_mark_start_commit)' failed in ha_rollback_trans The assertion is there to catch cases where we rollback while mark_start_commit() is active. This can allow following event groups to be replicated too early, causing conflicts. But in this case, we have an _explicit_ ROLLBACK event in the binlog, which should not assert. We fix this by delaying the mark_start_commit() in the explicit ROLLBACK case. It seems safest to delay this in ROLLBACK case anyway, and there should be no reason to try to optimise this corner case. --- mysql-test/suite/rpl/r/rpl_parallel.result | 18 ++++++++++ mysql-test/suite/rpl/t/rpl_parallel.test | 22 ++++++++++++ sql/rpl_parallel.cc | 41 +++++++++++++++++----- 3 files changed, 73 insertions(+), 8 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result index de67f5f0610ff..81243bbba49b8 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel.result +++ b/mysql-test/suite/rpl/r/rpl_parallel.result @@ -1689,6 +1689,24 @@ a b include/stop_slave.inc SET GLOBAL debug_dbug=@old_dbug; include/start_slave.inc +*** MDEV-8725: Assertion on ROLLBACK statement in the binary log *** +BEGIN; +INSERT INTO t2 VALUES (200); +INSERT INTO t1 VALUES (200); +INSERT INTO t2 VALUES (201); +ROLLBACK; +SELECT * FROM t1 WHERE a>=200 ORDER BY a; +a +200 +SELECT * FROM t2 WHERE a>=200 ORDER BY a; +a +include/save_master_gtid.inc +include/sync_with_master_gtid.inc +SELECT * FROM t1 WHERE a>=200 ORDER BY a; +a +200 +SELECT * FROM t2 WHERE a>=200 ORDER BY a; +a include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; include/start_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index e70dcfa5bb02c..01a46637f07b5 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -2369,6 +2369,28 @@ SET GLOBAL debug_dbug=@old_dbug; +--echo *** MDEV-8725: Assertion on ROLLBACK statement in the binary log *** +--connection server_1 +# Inject an event group terminated by ROLLBACK, by mixing MyISAM and InnoDB +# in a transaction. The bug was an assertion on the ROLLBACK due to +# mark_start_commit() being already called. +--disable_warnings +BEGIN; +INSERT INTO t2 VALUES (200); +INSERT INTO t1 VALUES (200); +INSERT INTO t2 VALUES (201); +ROLLBACK; +--enable_warnings +SELECT * FROM t1 WHERE a>=200 ORDER BY a; +SELECT * FROM t2 WHERE a>=200 ORDER BY a; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc +SELECT * FROM t1 WHERE a>=200 ORDER BY a; +SELECT * FROM t2 WHERE a>=200 ORDER BY a; + + # Clean up. --connection server_2 --source include/stop_slave.inc diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 600d2ab41aad5..df6fc92e9bd4a 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -317,13 +317,26 @@ convert_kill_to_deadlock_error(rpl_group_info *rgi) } -static bool +/* + Check if an event marks the end of an event group. Returns non-zero if so, + zero otherwise. + + In addition, returns 1 if the group is committing, 2 if it is rolling back. +*/ +static int is_group_ending(Log_event *ev, Log_event_type event_type) { - return event_type == XID_EVENT || - (event_type == QUERY_EVENT && - (((Query_log_event *)ev)->is_commit() || - ((Query_log_event *)ev)->is_rollback())); + if (event_type == XID_EVENT) + return 1; + if (event_type == QUERY_EVENT) + { + Query_log_event *qev = (Query_log_event *)ev; + if (qev->is_commit()) + return 1; + if (qev->is_rollback()) + return 2; + } + return 0; } @@ -574,7 +587,7 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt, err= 1; goto err; } - if (is_group_ending(ev, event_type)) + if (is_group_ending(ev, event_type) == 1) rgi->mark_start_commit(); err= rpt_handle_event(qev, rpt); @@ -713,7 +726,8 @@ handle_rpl_parallel_thread(void *arg) Log_event_type event_type; rpl_group_info *rgi= qev->rgi; rpl_parallel_entry *entry= rgi->parallel_entry; - bool end_of_group, group_ending; + bool end_of_group; + int group_ending; next_qev= qev->next; if (qev->typ == rpl_parallel_thread::queued_event::QUEUED_POS_UPDATE) @@ -888,7 +902,18 @@ handle_rpl_parallel_thread(void *arg) group_rgi= rgi; group_ending= is_group_ending(qev->ev, event_type); - if (group_ending && likely(!rgi->worker_error)) + /* + We do not unmark_start_commit() here in case of an explicit ROLLBACK + statement. Such events should be very rare, there is no real reason + to try to group commit them - on the contrary, it seems best to avoid + running them in parallel with following group commits, as with + ROLLBACK events we are already deep in dangerous corner cases with + mix of transactional and non-transactional tables or the like. And + avoiding the mark_start_commit() here allows us to keep an assertion + in ha_rollback_trans() that we do not rollback after doing + mark_start_commit(). + */ + if (group_ending == 1 && likely(!rgi->worker_error)) { /* Do an extra check for (deadlock) kill here. This helps prevent a From 09bfaf3a13dabad936198633b968451c17d409b2 Mon Sep 17 00:00:00 2001 From: Kristian Nielsen Date: Wed, 2 Sep 2015 10:08:09 +0200 Subject: [PATCH 2/2] Fix a potential lost wakeup for binlog_commit_wait_usec If a transaction T1 needs to wait for a transaction T2, T2's commit will skip the normal binlog_commit_wait_usec delay, in order not to needlessly stall throughput. This works by checking if T2 is already ready to commit. If so, it is woken up. If not, we set a flag in T2 so that when it gets ready to commit, it will do so immediately. But there was a potential race due to insufficient locking, if T2 gets ready to commit just at the point where T1 does the check. If the race hits, the wakeup (and early commit) of T2 might be lost. The race is only theoretical (from code inspection, no known test case), but seems best to fix it anyway, by properly locking LOCK_prepare_ordered around the check. --- sql/log.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/log.cc b/sql/log.cc index 99d3fb69b1873..6699454695331 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -7664,14 +7664,13 @@ void MYSQL_BIN_LOG::binlog_trigger_immediate_group_commit() { group_commit_entry *head; - mysql_mutex_lock(&LOCK_prepare_ordered); + mysql_mutex_assert_owner(&LOCK_prepare_ordered); head= group_commit_queue; if (head) { head->thd->has_waiter= true; mysql_cond_signal(&COND_prepare_ordered); } - mysql_mutex_unlock(&LOCK_prepare_ordered); } @@ -7690,9 +7689,11 @@ binlog_report_wait_for(THD *thd1, THD *thd2) { if (opt_binlog_commit_wait_count == 0) return; + mysql_mutex_lock(&LOCK_prepare_ordered); thd2->has_waiter= true; if (thd2->waiting_on_group_commit) mysql_bin_log.binlog_trigger_immediate_group_commit(); + mysql_mutex_unlock(&LOCK_prepare_ordered); }