Skip to content

Commit

Permalink
Merge MDEV-8302 into 10.1
Browse files Browse the repository at this point in the history
  • Loading branch information
knielsen committed Aug 4, 2015
2 parents 35a0198 + 9b9c5e8 commit dbd2057
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 6 deletions.
40 changes: 40 additions & 0 deletions mysql-test/suite/rpl/r/rpl_parallel.result
Expand Up @@ -1703,6 +1703,46 @@ include/stop_slave.inc
SET GLOBAL debug_dbug= @old_debg;
SET GLOBAL max_relay_log_size= @old_max;
include/start_slave.inc
*** MDEV-8302: Duplicate key with parallel replication ***
include/stop_slave.inc
/* Inject a small sleep which makes the race easier to hit. */
SET @old_dbug=@@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,inject_mdev8302";
INSERT INTO t7 VALUES (100,1), (101,2), (102,3), (103,4), (104,5);
SET @old_dbug= @@SESSION.debug_dbug;
SET @commit_id= 20000;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
SET SESSION debug_dbug=@old_dbug;
SELECT * FROM t7 ORDER BY a;
a b
1 1
2 2
3 86
4 4
5 5
100 5
101 1
102 2
103 3
104 4
include/save_master_gtid.inc
include/start_slave.inc
include/sync_with_master_gtid.inc
SELECT * FROM t7 ORDER BY a;
a b
1 1
2 2
3 86
4 4
5 5
100 5
101 1
102 2
103 3
104 4
include/stop_slave.inc
SET GLOBAL debug_dbug=@old_dbug;
include/start_slave.inc
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc
Expand Down
54 changes: 54 additions & 0 deletions mysql-test/suite/rpl/t/rpl_parallel.test
Expand Up @@ -2374,6 +2374,60 @@ SET GLOBAL max_relay_log_size= @old_max;
--source include/start_slave.inc


--echo *** MDEV-8302: Duplicate key with parallel replication ***

--connection server_2
--source include/stop_slave.inc
/* Inject a small sleep which makes the race easier to hit. */
SET @old_dbug=@@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,inject_mdev8302";


--connection server_1
INSERT INTO t7 VALUES (100,1), (101,2), (102,3), (103,4), (104,5);

# Artificially create a bunch of group commits with conflicting transactions.
# The bug happened when T1 and T2 was in one group commit, and T3 was in the
# following group commit. T2 is a DELETE of a row with same primary key as a
# row that T3 inserts. T1 and T2 can conflict, causing T2 to be deadlock
# killed after starting to commit. The bug was that T2 could roll back before
# doing unmark_start_commit(); this could allow T3 to run before the retry
# of T2, causing duplicate key violation.

SET @old_dbug= @@SESSION.debug_dbug;
SET @commit_id= 20000;
SET SESSION debug_dbug="+d,binlog_force_commit_id";

--let $n = 100
--disable_query_log
while ($n)
{
eval UPDATE t7 SET b=b+1 WHERE a=100+($n MOD 5);
eval DELETE FROM t7 WHERE a=100+($n MOD 5);

SET @commit_id = @commit_id + 1;
eval INSERT INTO t7 VALUES (100+($n MOD 5), $n);
SET @commit_id = @commit_id + 1;
dec $n;
}
--enable_query_log
SET SESSION debug_dbug=@old_dbug;


SELECT * FROM t7 ORDER BY a;
--source include/save_master_gtid.inc


--connection server_2
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
SELECT * FROM t7 ORDER BY a;

--source include/stop_slave.inc
SET GLOBAL debug_dbug=@old_dbug;
--source include/start_slave.inc



# Clean up.
--connection server_2
Expand Down
18 changes: 18 additions & 0 deletions sql/handler.cc
Expand Up @@ -1615,6 +1615,24 @@ int ha_rollback_trans(THD *thd, bool all)
DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
trans == &thd->transaction.stmt);

if (is_real_trans)
{
/*
In parallel replication, if we need to rollback during commit, we must
first inform following transactions that we are going to abort our commit
attempt. Otherwise those following transactions can run too early, and
possibly cause replication to fail. See comments in retry_event_group().
There were several bugs with this in the past that were very hard to
track down (MDEV-7458, MDEV-8302). So we add here an assertion for
rollback without signalling following transactions. And in release
builds, we explicitly do the signalling before rolling back.
*/
DBUG_ASSERT(!(thd->rgi_slave && thd->rgi_slave->did_mark_start_commit));
if (thd->rgi_slave && thd->rgi_slave->did_mark_start_commit)
thd->rgi_slave->unmark_start_commit();
}

if (thd->in_sub_stmt)
{
DBUG_ASSERT(0);
Expand Down
2 changes: 0 additions & 2 deletions sql/log_event.cc
Expand Up @@ -4234,7 +4234,6 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi,
"mysql", rpl_gtid_slave_state_table_name.str,
errcode,
thd->get_stmt_da()->message());
trans_rollback(thd);
sub_id= 0;
thd->is_slave_error= 1;
goto end;
Expand Down Expand Up @@ -7365,7 +7364,6 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi)
"%s.%s: %d: %s",
"mysql", rpl_gtid_slave_state_table_name.str, ec,
thd->get_stmt_da()->message());
trans_rollback(thd);
thd->is_slave_error= 1;
return err;
}
Expand Down
39 changes: 35 additions & 4 deletions sql/rpl_parallel.cc
Expand Up @@ -225,6 +225,11 @@ static void
signal_error_to_sql_driver_thread(THD *thd, rpl_group_info *rgi, int err)
{
rgi->worker_error= err;
/*
In case we get an error during commit, inform following transactions that
we aborted our commit.
*/
rgi->unmark_start_commit();
rgi->cleanup_context(thd, true);
rgi->rli->abort_slave= true;
rgi->rli->stop_for_until= false;
Expand Down Expand Up @@ -377,6 +382,7 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt,
transaction we deadlocked with will not signal that it started to commit
until after the unmark.
*/
DBUG_EXECUTE_IF("inject_mdev8302", { my_sleep(20000);});
rgi->unmark_start_commit();
DEBUG_SYNC(thd, "rpl_parallel_retry_after_unmark");

Expand Down Expand Up @@ -914,9 +920,24 @@ handle_rpl_parallel_thread(void *arg)
group_ending= is_group_ending(qev->ev, event_type);
if (group_ending && likely(!rgi->worker_error))
{
DEBUG_SYNC(thd, "rpl_parallel_before_mark_start_commit");
rgi->mark_start_commit();
DEBUG_SYNC(thd, "rpl_parallel_after_mark_start_commit");
/*
Do an extra check for (deadlock) kill here. This helps prevent a
lingering deadlock kill that occured during normal DML processing to
propagate past the mark_start_commit(). If we detect a deadlock only
after mark_start_commit(), we have to unmark, which has at least a
theoretical possibility of leaving a window where it looks like all
transactions in a GCO have started committing, while in fact one
will need to rollback and retry. This is not supposed to be possible
(since there is a deadlock, at least one transaction should be
blocked from reaching commit), but this seems a fragile ensurance,
and there were historically a number of subtle bugs in this area.
*/
if (!thd->killed)
{
DEBUG_SYNC(thd, "rpl_parallel_before_mark_start_commit");
rgi->mark_start_commit();
DEBUG_SYNC(thd, "rpl_parallel_after_mark_start_commit");
}
}

/*
Expand All @@ -941,7 +962,17 @@ handle_rpl_parallel_thread(void *arg)
});
if (!err)
#endif
err= rpt_handle_event(qev, rpt);
{
if (thd->check_killed())
{
thd->clear_error();
thd->get_stmt_da()->reset_diagnostics_area();
thd->send_kill_message();
err= 1;
}
else
err= rpt_handle_event(qev, rpt);
}
delete_or_keep_event_post_apply(rgi, event_type, qev->ev);
DBUG_EXECUTE_IF("rpl_parallel_simulate_temp_err_gtid_0_x_100",
err= dbug_simulate_tmp_error(rgi, thd););
Expand Down

0 comments on commit dbd2057

Please sign in to comment.