Skip to content

Commit accdabd

Browse files
committed
Merge MDEV-7888 and MDEV-7929 into 10.0.
2 parents e9c10f9 + 3b96134 commit accdabd

File tree

7 files changed

+231
-3
lines changed

7 files changed

+231
-3
lines changed

mysql-test/suite/rpl/r/rpl_parallel.result

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,6 +1509,64 @@ a b
15091509
99 99
15101510
include/stop_slave.inc
15111511
SET GLOBAL slave_transaction_retries= @old_retries;
1512+
SET GLOBAL slave_parallel_threads=10;
1513+
include/start_slave.inc
1514+
*** MDEV-7888: ANALYZE TABLE does wakeup_subsequent_commits(), causing wrong binlog order and parallel replication hang ***
1515+
include/stop_slave.inc
1516+
SET @old_dbug= @@GLOBAL.debug_dbug;
1517+
SET GLOBAL debug_dbug= '+d,inject_analyze_table_sleep';
1518+
SET @old_dbug= @@SESSION.debug_dbug;
1519+
SET SESSION debug_dbug="+d,binlog_force_commit_id";
1520+
SET @commit_id= 10000;
1521+
ANALYZE TABLE t2;
1522+
Table Op Msg_type Msg_text
1523+
test.t2 analyze status OK
1524+
INSERT INTO t3 VALUES (120, 0);
1525+
SET @commit_id= 10001;
1526+
INSERT INTO t3 VALUES (121, 0);
1527+
SET SESSION debug_dbug=@old_dbug;
1528+
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
1529+
a b
1530+
120 0
1531+
121 0
1532+
include/save_master_gtid.inc
1533+
include/start_slave.inc
1534+
include/sync_with_master_gtid.inc
1535+
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
1536+
a b
1537+
120 0
1538+
121 0
1539+
include/stop_slave.inc
1540+
SET GLOBAL debug_dbug= @old_debug;
1541+
include/start_slave.inc
1542+
*** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. ***
1543+
include/stop_slave.inc
1544+
SET @old_dbug= @@GLOBAL.debug_dbug;
1545+
SET GLOBAL debug_dbug= '+d,inject_record_gtid_serverid_100_sleep';
1546+
SET @old_dbug= @@SESSION.debug_dbug;
1547+
SET SESSION debug_dbug="+d,binlog_force_commit_id";
1548+
SET @old_server_id= @@SESSION.server_id;
1549+
SET SESSION server_id= 100;
1550+
SET @commit_id= 10010;
1551+
ALTER TABLE t1 COMMENT "Hulubulu!";
1552+
SET SESSION server_id= @old_server_id;
1553+
INSERT INTO t3 VALUES (130, 0);
1554+
SET @commit_id= 10011;
1555+
INSERT INTO t3 VALUES (131, 0);
1556+
SET SESSION debug_dbug=@old_dbug;
1557+
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
1558+
a b
1559+
130 0
1560+
131 0
1561+
include/save_master_gtid.inc
1562+
include/start_slave.inc
1563+
include/sync_with_master_gtid.inc
1564+
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
1565+
a b
1566+
130 0
1567+
131 0
1568+
include/stop_slave.inc
1569+
SET GLOBAL debug_dbug= @old_debug;
15121570
include/start_slave.inc
15131571
include/stop_slave.inc
15141572
SET GLOBAL slave_parallel_threads=@old_parallel_threads;

mysql-test/suite/rpl/t/rpl_parallel.test

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2109,6 +2109,100 @@ SELECT * FROM t8 ORDER BY a;
21092109

21102110
--source include/stop_slave.inc
21112111
SET GLOBAL slave_transaction_retries= @old_retries;
2112+
SET GLOBAL slave_parallel_threads=10;
2113+
--source include/start_slave.inc
2114+
2115+
2116+
--echo *** MDEV-7888: ANALYZE TABLE does wakeup_subsequent_commits(), causing wrong binlog order and parallel replication hang ***
2117+
2118+
--connection server_2
2119+
--source include/stop_slave.inc
2120+
SET @old_dbug= @@GLOBAL.debug_dbug;
2121+
SET GLOBAL debug_dbug= '+d,inject_analyze_table_sleep';
2122+
2123+
--connection server_1
2124+
# Inject two group commits. The bug was that ANALYZE TABLE would call
2125+
# wakeup_subsequent_commits() too early, allowing the following transaction
2126+
# in the same group to run ahead and binlog and free the GCO. Then we get
2127+
# wrong binlog order and later access freed GCO, which causes lost wakeup
2128+
# of following GCO and thus replication hang.
2129+
# We injected a small sleep in ANALYZE to make the race easier to hit (this
2130+
# can only cause false negatives in versions with the bug, not false positives,
2131+
# so sleep is ok here. And it's in general not possible to trigger reliably
2132+
# the race with debug_sync, since the bugfix makes the race impossible).
2133+
2134+
SET @old_dbug= @@SESSION.debug_dbug;
2135+
SET SESSION debug_dbug="+d,binlog_force_commit_id";
2136+
2137+
# Group commit with cid=10000, two event groups.
2138+
SET @commit_id= 10000;
2139+
ANALYZE TABLE t2;
2140+
INSERT INTO t3 VALUES (120, 0);
2141+
2142+
# Group commit with cid=10001, one event group.
2143+
SET @commit_id= 10001;
2144+
INSERT INTO t3 VALUES (121, 0);
2145+
2146+
SET SESSION debug_dbug=@old_dbug;
2147+
2148+
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
2149+
--source include/save_master_gtid.inc
2150+
2151+
--connection server_2
2152+
--source include/start_slave.inc
2153+
--source include/sync_with_master_gtid.inc
2154+
2155+
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
2156+
2157+
--source include/stop_slave.inc
2158+
SET GLOBAL debug_dbug= @old_debug;
2159+
--source include/start_slave.inc
2160+
2161+
2162+
--echo *** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. ***
2163+
2164+
--connection server_2
2165+
--source include/stop_slave.inc
2166+
SET @old_dbug= @@GLOBAL.debug_dbug;
2167+
SET GLOBAL debug_dbug= '+d,inject_record_gtid_serverid_100_sleep';
2168+
2169+
--connection server_1
2170+
# Inject two group commits. The bug was that record_gtid for a
2171+
# non-transactional event group would commit its own transaction, which would
2172+
# cause ha_commit_trans() to call wakeup_subsequent_commits() too early. This
2173+
# in turn lead to access to freed group_commit_orderer object, losing a wakeup
2174+
# and causing slave threads to hang.
2175+
# We inject a small sleep in the corresponding record_gtid() to make the race
2176+
# easier to hit.
2177+
2178+
SET @old_dbug= @@SESSION.debug_dbug;
2179+
SET SESSION debug_dbug="+d,binlog_force_commit_id";
2180+
2181+
# Group commit with cid=10010, two event groups.
2182+
SET @old_server_id= @@SESSION.server_id;
2183+
SET SESSION server_id= 100;
2184+
SET @commit_id= 10010;
2185+
ALTER TABLE t1 COMMENT "Hulubulu!";
2186+
SET SESSION server_id= @old_server_id;
2187+
INSERT INTO t3 VALUES (130, 0);
2188+
2189+
# Group commit with cid=10011, one event group.
2190+
SET @commit_id= 10011;
2191+
INSERT INTO t3 VALUES (131, 0);
2192+
2193+
SET SESSION debug_dbug=@old_dbug;
2194+
2195+
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
2196+
--source include/save_master_gtid.inc
2197+
2198+
--connection server_2
2199+
--source include/start_slave.inc
2200+
--source include/sync_with_master_gtid.inc
2201+
2202+
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
2203+
2204+
--source include/stop_slave.inc
2205+
SET GLOBAL debug_dbug= @old_debug;
21122206
--source include/start_slave.inc
21132207

21142208

sql/log.cc

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5857,14 +5857,24 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
58575857
if (direct)
58585858
{
58595859
int res;
5860+
uint64 commit_id= 0;
58605861
DBUG_PRINT("info", ("direct is set"));
58615862
if ((res= thd->wait_for_prior_commit()))
58625863
DBUG_RETURN(res);
58635864
file= &log_file;
58645865
my_org_b_tell= my_b_tell(file);
58655866
mysql_mutex_lock(&LOCK_log);
58665867
prev_binlog_id= current_binlog_id;
5867-
if (write_gtid_event(thd, true, using_trans, 0))
5868+
DBUG_EXECUTE_IF("binlog_force_commit_id",
5869+
{
5870+
const LEX_STRING name= { C_STRING_WITH_LEN("commit_id") };
5871+
bool null_value;
5872+
user_var_entry *entry=
5873+
(user_var_entry*) my_hash_search(&thd->user_vars,
5874+
(uchar*) name.str, name.length);
5875+
commit_id= entry->val_int(&null_value);
5876+
});
5877+
if (write_gtid_event(thd, true, using_trans, commit_id))
58685878
goto err;
58695879
}
58705880
else

sql/rpl_gtid.cc

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
515515
element *elem;
516516
ulonglong thd_saved_option= thd->variables.option_bits;
517517
Query_tables_list lex_backup;
518+
wait_for_commit* suspended_wfc;
518519
DBUG_ENTER("record_gtid");
519520

520521
if (unlikely(!loaded))
@@ -538,6 +539,28 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
538539
DBUG_RETURN(1);
539540
} );
540541

542+
/*
543+
If we are applying a non-transactional event group, we will be committing
544+
here a transaction, but that does not imply that the event group has
545+
completed or has been binlogged. So we should not trigger
546+
wakeup_subsequent_commits() here.
547+
548+
Note: An alternative here could be to put a call to mark_start_commit() in
549+
stmt_done() before the call to record_and_update_gtid(). This would
550+
prevent later calling mark_start_commit() after we have run
551+
wakeup_subsequent_commits() from committing the GTID update transaction
552+
(which must be avoided to avoid accessing freed group_commit_orderer
553+
object). It would also allow following event groups to start slightly
554+
earlier. And in the cases where record_gtid() is called without an active
555+
transaction, the current statement should have been binlogged already, so
556+
binlog order is preserved.
557+
558+
But this is rather subtle, and potentially fragile. And it does not really
559+
seem worth it; non-transactional loads are unlikely to benefit much from
560+
parallel replication in any case. So for now, we go with the simple
561+
suspend/resume of wakeup_subsequent_commits() here in record_gtid().
562+
*/
563+
suspended_wfc= thd->suspend_subsequent_commits();
541564
thd->lex->reset_n_backup_query_tables_list(&lex_backup);
542565
tlist.init_one_table(STRING_WITH_LEN("mysql"),
543566
rpl_gtid_slave_state_table_name.str,
@@ -689,6 +712,12 @@ IF_DBUG(dbug_break:, )
689712
}
690713
thd->lex->restore_backup_query_tables_list(&lex_backup);
691714
thd->variables.option_bits= thd_saved_option;
715+
thd->resume_subsequent_commits(suspended_wfc);
716+
DBUG_EXECUTE_IF("inject_record_gtid_serverid_100_sleep",
717+
{
718+
if (gtid->server_id == 100)
719+
my_sleep(500000);
720+
});
692721
DBUG_RETURN(err);
693722
}
694723

sql/rpl_parallel.cc

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,24 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
171171
/* Now free any GCOs in which all transactions have committed. */
172172
group_commit_orderer *tmp_gco= rgi->gco;
173173
while (tmp_gco &&
174-
(!tmp_gco->next_gco || tmp_gco->last_sub_id > sub_id))
174+
(!tmp_gco->next_gco || tmp_gco->last_sub_id > sub_id ||
175+
tmp_gco->next_gco->wait_count > entry->count_committing_event_groups))
176+
{
177+
/*
178+
We must not free a GCO before the wait_count of the following GCO has
179+
been reached and wakeup has been sent. Otherwise we will lose the
180+
wakeup and hang (there were several such bugs in the past).
181+
182+
The intention is that this is ensured already since we only free when
183+
the last event group in the GCO has committed
184+
(tmp_gco->last_sub_id <= sub_id). However, if we have a bug, we have
185+
extra check on next_gco->wait_count to hopefully avoid hanging; we
186+
have here an assertion in debug builds that this check does not in
187+
fact trigger.
188+
*/
189+
DBUG_ASSERT(!tmp_gco->next_gco || tmp_gco->last_sub_id > sub_id);
175190
tmp_gco= tmp_gco->prev_gco;
191+
}
176192
while (tmp_gco)
177193
{
178194
group_commit_orderer *prev_gco= tmp_gco->prev_gco;

sql/sql_admin.cc

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
320320
int result_code;
321321
int compl_result_code;
322322
bool need_repair_or_alter= 0;
323+
wait_for_commit* suspended_wfc;
323324

324325
DBUG_ENTER("mysql_admin_table");
325326
DBUG_PRINT("enter", ("extra_open_options: %u", extra_open_options));
@@ -337,6 +338,13 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
337338
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
338339
DBUG_RETURN(TRUE);
339340

341+
/*
342+
This function calls trans_commit() during its operation, but that does not
343+
imply that the operation is complete or binlogged. So we have to suspend
344+
temporarily the wakeup_subsequent_commits() calls (if used).
345+
*/
346+
suspended_wfc= thd->suspend_subsequent_commits();
347+
340348
mysql_ha_rm_tables(thd, tables);
341349

342350
/*
@@ -464,7 +472,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
464472
if (!table->table->part_info)
465473
{
466474
my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0));
467-
DBUG_RETURN(TRUE);
475+
goto err2;
468476
}
469477
if (set_part_state(alter_info, table->table->part_info, PART_ADMIN))
470478
{
@@ -1045,6 +1053,8 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
10451053
}
10461054

10471055
my_eof(thd);
1056+
thd->resume_subsequent_commits(suspended_wfc);
1057+
DBUG_EXECUTE_IF("inject_analyze_table_sleep", my_sleep(500000););
10481058
DBUG_RETURN(FALSE);
10491059

10501060
err:
@@ -1058,6 +1068,8 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
10581068
}
10591069
close_thread_tables(thd); // Shouldn't be needed
10601070
thd->mdl_context.release_transactional_locks();
1071+
err2:
1072+
thd->resume_subsequent_commits(suspended_wfc);
10611073
DBUG_RETURN(TRUE);
10621074
}
10631075

sql/sql_class.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3694,6 +3694,15 @@ class THD :public Statement,
36943694
if (wait_for_commit_ptr)
36953695
wait_for_commit_ptr->wakeup_subsequent_commits(wakeup_error);
36963696
}
3697+
wait_for_commit *suspend_subsequent_commits() {
3698+
wait_for_commit *suspended= wait_for_commit_ptr;
3699+
wait_for_commit_ptr= NULL;
3700+
return suspended;
3701+
}
3702+
void resume_subsequent_commits(wait_for_commit *suspended) {
3703+
DBUG_ASSERT(!wait_for_commit_ptr);
3704+
wait_for_commit_ptr= suspended;
3705+
}
36973706

36983707
private:
36993708

0 commit comments

Comments
 (0)