Skip to content

Commit 999c43a

Browse files
committed
MDEV-8725: Assertion `!(thd->rgi_slave && thd-> rgi_slave->did_mark_start_commit)' failed in ha_rollback_trans
The assertion is there to catch cases where we rollback while mark_start_commit() is active. This can allow following event groups to be replicated too early, causing conflicts. But in this case, we have an _explicit_ ROLLBACK event in the binlog, which should not assert. We fix this by delaying the mark_start_commit() in the explicit ROLLBACK case. It seems safest to delay this in ROLLBACK case anyway, and there should be no reason to try to optimise this corner case.
1 parent 5ca061e commit 999c43a

File tree

3 files changed

+73
-8
lines changed

3 files changed

+73
-8
lines changed

mysql-test/suite/rpl/r/rpl_parallel.result

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1689,6 +1689,24 @@ a b
16891689
include/stop_slave.inc
16901690
SET GLOBAL debug_dbug=@old_dbug;
16911691
include/start_slave.inc
1692+
*** MDEV-8725: Assertion on ROLLBACK statement in the binary log ***
1693+
BEGIN;
1694+
INSERT INTO t2 VALUES (200);
1695+
INSERT INTO t1 VALUES (200);
1696+
INSERT INTO t2 VALUES (201);
1697+
ROLLBACK;
1698+
SELECT * FROM t1 WHERE a>=200 ORDER BY a;
1699+
a
1700+
200
1701+
SELECT * FROM t2 WHERE a>=200 ORDER BY a;
1702+
a
1703+
include/save_master_gtid.inc
1704+
include/sync_with_master_gtid.inc
1705+
SELECT * FROM t1 WHERE a>=200 ORDER BY a;
1706+
a
1707+
200
1708+
SELECT * FROM t2 WHERE a>=200 ORDER BY a;
1709+
a
16921710
include/stop_slave.inc
16931711
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
16941712
include/start_slave.inc

mysql-test/suite/rpl/t/rpl_parallel.test

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2369,6 +2369,28 @@ SET GLOBAL debug_dbug=@old_dbug;
23692369

23702370

23712371

2372+
--echo *** MDEV-8725: Assertion on ROLLBACK statement in the binary log ***
2373+
--connection server_1
2374+
# Inject an event group terminated by ROLLBACK, by mixing MyISAM and InnoDB
2375+
# in a transaction. The bug was an assertion on the ROLLBACK due to
2376+
# mark_start_commit() being already called.
2377+
--disable_warnings
2378+
BEGIN;
2379+
INSERT INTO t2 VALUES (200);
2380+
INSERT INTO t1 VALUES (200);
2381+
INSERT INTO t2 VALUES (201);
2382+
ROLLBACK;
2383+
--enable_warnings
2384+
SELECT * FROM t1 WHERE a>=200 ORDER BY a;
2385+
SELECT * FROM t2 WHERE a>=200 ORDER BY a;
2386+
--source include/save_master_gtid.inc
2387+
2388+
--connection server_2
2389+
--source include/sync_with_master_gtid.inc
2390+
SELECT * FROM t1 WHERE a>=200 ORDER BY a;
2391+
SELECT * FROM t2 WHERE a>=200 ORDER BY a;
2392+
2393+
23722394
# Clean up.
23732395
--connection server_2
23742396
--source include/stop_slave.inc

sql/rpl_parallel.cc

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -317,13 +317,26 @@ convert_kill_to_deadlock_error(rpl_group_info *rgi)
317317
}
318318

319319

320-
static bool
320+
/*
321+
Check if an event marks the end of an event group. Returns non-zero if so,
322+
zero otherwise.
323+
324+
In addition, returns 1 if the group is committing, 2 if it is rolling back.
325+
*/
326+
static int
321327
is_group_ending(Log_event *ev, Log_event_type event_type)
322328
{
323-
return event_type == XID_EVENT ||
324-
(event_type == QUERY_EVENT &&
325-
(((Query_log_event *)ev)->is_commit() ||
326-
((Query_log_event *)ev)->is_rollback()));
329+
if (event_type == XID_EVENT)
330+
return 1;
331+
if (event_type == QUERY_EVENT)
332+
{
333+
Query_log_event *qev = (Query_log_event *)ev;
334+
if (qev->is_commit())
335+
return 1;
336+
if (qev->is_rollback())
337+
return 2;
338+
}
339+
return 0;
327340
}
328341

329342

@@ -574,7 +587,7 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt,
574587
err= 1;
575588
goto err;
576589
}
577-
if (is_group_ending(ev, event_type))
590+
if (is_group_ending(ev, event_type) == 1)
578591
rgi->mark_start_commit();
579592

580593
err= rpt_handle_event(qev, rpt);
@@ -713,7 +726,8 @@ handle_rpl_parallel_thread(void *arg)
713726
Log_event_type event_type;
714727
rpl_group_info *rgi= qev->rgi;
715728
rpl_parallel_entry *entry= rgi->parallel_entry;
716-
bool end_of_group, group_ending;
729+
bool end_of_group;
730+
int group_ending;
717731

718732
next_qev= qev->next;
719733
if (qev->typ == rpl_parallel_thread::queued_event::QUEUED_POS_UPDATE)
@@ -888,7 +902,18 @@ handle_rpl_parallel_thread(void *arg)
888902

889903
group_rgi= rgi;
890904
group_ending= is_group_ending(qev->ev, event_type);
891-
if (group_ending && likely(!rgi->worker_error))
905+
/*
906+
We do not unmark_start_commit() here in case of an explicit ROLLBACK
907+
statement. Such events should be very rare, there is no real reason
908+
to try to group commit them - on the contrary, it seems best to avoid
909+
running them in parallel with following group commits, as with
910+
ROLLBACK events we are already deep in dangerous corner cases with
911+
mix of transactional and non-transactional tables or the like. And
912+
avoiding the mark_start_commit() here allows us to keep an assertion
913+
in ha_rollback_trans() that we do not rollback after doing
914+
mark_start_commit().
915+
*/
916+
if (group_ending == 1 && likely(!rgi->worker_error))
892917
{
893918
/*
894919
Do an extra check for (deadlock) kill here. This helps prevent a

0 commit comments

Comments
 (0)