Skip to content

Commit 03d1346

Browse files
committed
MDEV-29369: rpl.rpl_semi_sync_shutdown_await_ack fails regularly with Result content mismatch
This test was prone to failures for a few reasons, summarized below: 1) MDEV-32168 introduced “only_running_threads=1” to slave_stop.inc, which allowed the stop logic to bypass an attempting-to-reconnect IO thread. That is, the IO thread could realize the master shutdown in `read_event()`, and thereby call into `try_to_reconnect()`. This would leave the IO thread up when the test expected it to be stopped. Fixed by explicitly stopping the IO thread and allowing an error state, as the above case would lead to errno 2003. 2) On slow systems (or those running profiling tools, e.g. MSAN), the waiting-for-ack transaction can complete before the system processes the `SHUTDOWN WAIT FOR ALL SLAVES`. There was shutdown preparation logic in-between the transaction and shutdown itself, which contributes to this problem. This patch also moves this preparation logic before the transaction, so there is less to do in-between the calls. 3) Changed work-around for MDEV-28141 to use debug_sync instead of sleep delay, as it was still possible to hit the bug on very slow systems. 4) Masked MTR variable reset with disable/enable query log Reviewed By: ============ Kristian Nielsen <knielsen@knielsen-hq.org>
1 parent ee89558 commit 03d1346

File tree

5 files changed

+190
-149
lines changed

5 files changed

+190
-149
lines changed

mysql-test/suite/rpl/r/rpl_semi_sync_shutdown_await_ack.result

Lines changed: 90 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,15 @@ call mtr.add_suppression("reply failed");
1515
call mtr.add_suppression("Replication event checksum verification");
1616
call mtr.add_suppression("Relay log write failure");
1717
call mtr.add_suppression("Failed to kill the active semi-sync connection");
18+
set @sav_enabled_server_2= @@GLOBAL.rpl_semi_sync_slave_enabled;
19+
set @sav_server_2_dbug= @@GLOBAL.debug_dbug;
1820
connection server_3;
1921
call mtr.add_suppression("reply failed");
2022
call mtr.add_suppression("Replication event checksum verification");
2123
call mtr.add_suppression("Relay log write failure");
2224
call mtr.add_suppression("Failed to kill the active semi-sync connection");
25+
set @sav_enabled_server_3= @@GLOBAL.rpl_semi_sync_slave_enabled;
26+
set @sav_server_3_dbug= @@GLOBAL.debug_dbug;
2327
connection server_1;
2428
CREATE TABLE t1 (a int);
2529
connection server_2;
@@ -40,15 +44,15 @@ connection server_1;
4044
#-- Enable semi-sync on slaves
4145
let slave_last= 3
4246
connection server_2;
43-
set global rpl_semi_sync_slave_enabled = 1;
4447
include/stop_slave.inc
48+
set global rpl_semi_sync_slave_enabled = 1;
4549
include/start_slave.inc
4650
show status like 'Rpl_semi_sync_slave_status';
4751
Variable_name Value
4852
Rpl_semi_sync_slave_status ON
4953
connection server_3;
50-
set global rpl_semi_sync_slave_enabled = 1;
5154
include/stop_slave.inc
55+
set global rpl_semi_sync_slave_enabled = 1;
5256
include/start_slave.inc
5357
show status like 'Rpl_semi_sync_slave_status';
5458
Variable_name Value
@@ -67,24 +71,20 @@ show status like 'Rpl_semi_sync_master_clients';
6771
Variable_name Value
6872
Rpl_semi_sync_master_clients 2
6973
#-- Prepare servers to simulate delay or error
70-
connection server_1;
71-
SET @@GLOBAL.debug_dbug= "";
7274
connection server_2;
7375
SET @@GLOBAL.debug_dbug= "+d,simulate_delay_semisync_slave_reply";
7476
connection server_3;
7577
SET @@GLOBAL.debug_dbug= "+d,simulate_delay_semisync_slave_reply";
7678
#--
7779
#-- Test begins
80+
connection server_1_con2;
81+
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
82+
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
7883
connection server_1;
7984
#-- Begin semi-sync transaction
8085
INSERT INTO t1 VALUES (1);
8186
connection server_1_con2;
8287
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
83-
show status like 'Rpl_semi_sync_master_wait_sessions';
84-
Variable_name Value
85-
Rpl_semi_sync_master_wait_sessions 1
86-
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
87-
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
8888
#-- Begin master shutdown
8989
SHUTDOWN WAIT FOR ALL SLAVES;
9090
connection server_1;
@@ -111,22 +111,19 @@ count(*)=1
111111
#-- Re-synchronize slaves with master and disable semi-sync
112112
#-- Stop slaves
113113
connection server_2;
114-
SET @@GLOBAL.debug_dbug= "";
115-
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
116-
include/stop_slave.inc
114+
include/stop_slave_io.inc
115+
include/stop_slave_sql.inc
116+
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
117+
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
117118
connection server_3;
118-
SET @@GLOBAL.debug_dbug= "";
119-
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
120-
include/stop_slave.inc
119+
include/stop_slave_io.inc
120+
include/stop_slave_sql.inc
121+
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
122+
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
121123
#-- Bring the master back up
122124
connection server_1_con2;
123125
connection default;
124126
connection server_1;
125-
SET @@GLOBAL.debug_dbug= "";
126-
SET @@GLOBAL.rpl_semi_sync_master_enabled = 0;
127-
show status like 'Rpl_semi_sync_master_status';
128-
Variable_name Value
129-
Rpl_semi_sync_master_status OFF
130127
TRUNCATE TABLE t1;
131128
#-- Bring slaves back up
132129
connection server_2;
@@ -157,15 +154,15 @@ connection server_1;
157154
#-- Enable semi-sync on slaves
158155
let slave_last= 3
159156
connection server_2;
160-
set global rpl_semi_sync_slave_enabled = 1;
161157
include/stop_slave.inc
158+
set global rpl_semi_sync_slave_enabled = 1;
162159
include/start_slave.inc
163160
show status like 'Rpl_semi_sync_slave_status';
164161
Variable_name Value
165162
Rpl_semi_sync_slave_status ON
166163
connection server_3;
167-
set global rpl_semi_sync_slave_enabled = 1;
168164
include/stop_slave.inc
165+
set global rpl_semi_sync_slave_enabled = 1;
169166
include/start_slave.inc
170167
show status like 'Rpl_semi_sync_slave_status';
171168
Variable_name Value
@@ -184,24 +181,20 @@ show status like 'Rpl_semi_sync_master_clients';
184181
Variable_name Value
185182
Rpl_semi_sync_master_clients 2
186183
#-- Prepare servers to simulate delay or error
187-
connection server_1;
188-
SET @@GLOBAL.debug_dbug= "+d,mysqld_delay_kill_threads_phase_1";
189184
connection server_2;
190-
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event";
185+
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141";
191186
connection server_3;
192-
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event";
187+
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141";
193188
#--
194189
#-- Test begins
190+
connection server_1_con2;
191+
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
192+
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
195193
connection server_1;
196194
#-- Begin semi-sync transaction
197195
INSERT INTO t1 VALUES (1);
198196
connection server_1_con2;
199197
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
200-
show status like 'Rpl_semi_sync_master_wait_sessions';
201-
Variable_name Value
202-
Rpl_semi_sync_master_wait_sessions 1
203-
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
204-
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
205198
#-- Begin master shutdown
206199
SHUTDOWN WAIT FOR ALL SLAVES;
207200
connection server_1;
@@ -226,24 +219,33 @@ count(*)=0
226219
1
227220
#
228221
#-- Re-synchronize slaves with master and disable semi-sync
222+
#-- FIXME: workaround for MDEV-28141, preventing errored replicas from
223+
# killing their semi-sync connections
224+
connection server_2;
225+
set debug_sync= "now wait_for at_semisync_kill_connection";
226+
set debug_sync= "now signal continue_semisync_kill_connection";
227+
# Wait for debug_sync signal to have been received before issuing RESET
228+
set debug_sync= "reset";
229+
connection server_3;
230+
set debug_sync= "now wait_for at_semisync_kill_connection";
231+
set debug_sync= "now signal continue_semisync_kill_connection";
232+
# Wait for debug_sync signal to have been received before issuing RESET
233+
set debug_sync= "reset";
229234
#-- Stop slaves
230235
connection server_2;
231-
SET @@GLOBAL.debug_dbug= "";
232-
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
233-
include/stop_slave.inc
236+
include/stop_slave_io.inc
237+
include/stop_slave_sql.inc
238+
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
239+
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
234240
connection server_3;
235-
SET @@GLOBAL.debug_dbug= "";
236-
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
237-
include/stop_slave.inc
241+
include/stop_slave_io.inc
242+
include/stop_slave_sql.inc
243+
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
244+
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
238245
#-- Bring the master back up
239246
connection server_1_con2;
240247
connection default;
241248
connection server_1;
242-
SET @@GLOBAL.debug_dbug= "";
243-
SET @@GLOBAL.rpl_semi_sync_master_enabled = 0;
244-
show status like 'Rpl_semi_sync_master_status';
245-
Variable_name Value
246-
Rpl_semi_sync_master_status OFF
247249
TRUNCATE TABLE t1;
248250
#-- Bring slaves back up
249251
connection server_2;
@@ -275,15 +277,15 @@ connection server_1;
275277
#-- Enable semi-sync on slaves
276278
let slave_last= 3
277279
connection server_2;
278-
set global rpl_semi_sync_slave_enabled = 1;
279280
include/stop_slave.inc
281+
set global rpl_semi_sync_slave_enabled = 1;
280282
include/start_slave.inc
281283
show status like 'Rpl_semi_sync_slave_status';
282284
Variable_name Value
283285
Rpl_semi_sync_slave_status ON
284286
connection server_3;
285-
set global rpl_semi_sync_slave_enabled = 1;
286287
include/stop_slave.inc
288+
set global rpl_semi_sync_slave_enabled = 1;
287289
include/start_slave.inc
288290
show status like 'Rpl_semi_sync_slave_status';
289291
Variable_name Value
@@ -302,24 +304,20 @@ show status like 'Rpl_semi_sync_master_clients';
302304
Variable_name Value
303305
Rpl_semi_sync_master_clients 2
304306
#-- Prepare servers to simulate delay or error
305-
connection server_1;
306-
SET @@GLOBAL.debug_dbug= "+d,mysqld_delay_kill_threads_phase_1";
307307
connection server_2;
308-
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event";
308+
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141";
309309
connection server_3;
310310
SET @@GLOBAL.debug_dbug= "+d,simulate_delay_semisync_slave_reply";
311311
#--
312312
#-- Test begins
313+
connection server_1_con2;
314+
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
315+
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
313316
connection server_1;
314317
#-- Begin semi-sync transaction
315318
INSERT INTO t1 VALUES (1);
316319
connection server_1_con2;
317320
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
318-
show status like 'Rpl_semi_sync_master_wait_sessions';
319-
Variable_name Value
320-
Rpl_semi_sync_master_wait_sessions 1
321-
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
322-
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
323321
#-- Begin master shutdown
324322
SHUTDOWN WAIT FOR ALL SLAVES;
325323
connection server_1;
@@ -344,24 +342,28 @@ count(*)=1
344342
1
345343
#
346344
#-- Re-synchronize slaves with master and disable semi-sync
345+
#-- FIXME: workaround for MDEV-28141, preventing errored replicas from
346+
# killing their semi-sync connections
347+
connection server_2;
348+
set debug_sync= "now wait_for at_semisync_kill_connection";
349+
set debug_sync= "now signal continue_semisync_kill_connection";
350+
# Wait for debug_sync signal to have been received before issuing RESET
351+
set debug_sync= "reset";
347352
#-- Stop slaves
348353
connection server_2;
349-
SET @@GLOBAL.debug_dbug= "";
350-
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
351-
include/stop_slave.inc
354+
include/stop_slave_io.inc
355+
include/stop_slave_sql.inc
356+
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
357+
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
352358
connection server_3;
353-
SET @@GLOBAL.debug_dbug= "";
354-
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
355-
include/stop_slave.inc
359+
include/stop_slave_io.inc
360+
include/stop_slave_sql.inc
361+
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
362+
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
356363
#-- Bring the master back up
357364
connection server_1_con2;
358365
connection default;
359366
connection server_1;
360-
SET @@GLOBAL.debug_dbug= "";
361-
SET @@GLOBAL.rpl_semi_sync_master_enabled = 0;
362-
show status like 'Rpl_semi_sync_master_status';
363-
Variable_name Value
364-
Rpl_semi_sync_master_status OFF
365367
TRUNCATE TABLE t1;
366368
#-- Bring slaves back up
367369
connection server_2;
@@ -399,15 +401,15 @@ connection server_1;
399401
#-- Enable semi-sync on slaves
400402
let slave_last= 3
401403
connection server_2;
402-
set global rpl_semi_sync_slave_enabled = 1;
403404
include/stop_slave.inc
405+
set global rpl_semi_sync_slave_enabled = 1;
404406
include/start_slave.inc
405407
show status like 'Rpl_semi_sync_slave_status';
406408
Variable_name Value
407409
Rpl_semi_sync_slave_status ON
408410
connection server_3;
409-
set global rpl_semi_sync_slave_enabled = 1;
410411
include/stop_slave.inc
412+
set global rpl_semi_sync_slave_enabled = 1;
411413
include/start_slave.inc
412414
show status like 'Rpl_semi_sync_slave_status';
413415
Variable_name Value
@@ -426,24 +428,20 @@ show status like 'Rpl_semi_sync_master_clients';
426428
Variable_name Value
427429
Rpl_semi_sync_master_clients 2
428430
#-- Prepare servers to simulate delay or error
429-
connection server_1;
430-
SET @@GLOBAL.debug_dbug= "+d,mysqld_delay_kill_threads_phase_1";
431431
connection server_2;
432-
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,slave_delay_killing_semisync_connection";
432+
SET @@GLOBAL.debug_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141";
433433
connection server_3;
434434
SET @@GLOBAL.debug_dbug= "+d,simulate_delay_semisync_slave_reply";
435435
#--
436436
#-- Test begins
437+
connection server_1_con2;
438+
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
439+
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
437440
connection server_1;
438441
#-- Begin semi-sync transaction
439442
INSERT INTO t1 VALUES (1);
440443
connection server_1_con2;
441444
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
442-
show status like 'Rpl_semi_sync_master_wait_sessions';
443-
Variable_name Value
444-
Rpl_semi_sync_master_wait_sessions 1
445-
#-- Give enough time after timeout/ack received to query yes_tx/no_tx
446-
SET @@GLOBAL.debug_dbug= "+d,delay_shutdown_phase_2_after_semisync_wait";
447445
#-- Begin master shutdown
448446
SHUTDOWN WAIT FOR ALL SLAVES;
449447
connection server_1;
@@ -468,24 +466,28 @@ count(*)=1
468466
1
469467
#
470468
#-- Re-synchronize slaves with master and disable semi-sync
469+
#-- FIXME: workaround for MDEV-28141, preventing errored replicas from
470+
# killing their semi-sync connections
471+
connection server_2;
472+
set debug_sync= "now wait_for at_semisync_kill_connection";
473+
set debug_sync= "now signal continue_semisync_kill_connection";
474+
# Wait for debug_sync signal to have been received before issuing RESET
475+
set debug_sync= "reset";
471476
#-- Stop slaves
472477
connection server_2;
473-
SET @@GLOBAL.debug_dbug= "";
474-
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
475-
include/stop_slave.inc
478+
include/stop_slave_io.inc
479+
include/stop_slave_sql.inc
480+
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
481+
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_2;
476482
connection server_3;
477-
SET @@GLOBAL.debug_dbug= "";
478-
SET @@GLOBAL.rpl_semi_sync_slave_enabled= 0;
479-
include/stop_slave.inc
483+
include/stop_slave_io.inc
484+
include/stop_slave_sql.inc
485+
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
486+
SET @@GLOBAL.rpl_semi_sync_slave_enabled= @sav_enabled_server_3;
480487
#-- Bring the master back up
481488
connection server_1_con2;
482489
connection default;
483490
connection server_1;
484-
SET @@GLOBAL.debug_dbug= "";
485-
SET @@GLOBAL.rpl_semi_sync_master_enabled = 0;
486-
show status like 'Rpl_semi_sync_master_status';
487-
Variable_name Value
488-
Rpl_semi_sync_master_status OFF
489491
TRUNCATE TABLE t1;
490492
#-- Bring slaves back up
491493
connection server_2;
@@ -509,9 +511,13 @@ COUNT(*)=0
509511
#############################
510512
connection server_2;
511513
include/stop_slave.inc
514+
SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_2;
515+
SET @@GLOBAL.debug_dbug= @sav_server_2_dbug;
512516
include/start_slave.inc
513517
connection server_3;
514518
include/stop_slave.inc
519+
SET @@GLOBAL.rpl_semi_sync_slave_enabled = @sav_enabled_server_3;
520+
SET @@GLOBAL.debug_dbug= @sav_server_3_dbug;
515521
include/start_slave.inc
516522
connection server_1;
517523
drop table t1;

0 commit comments

Comments
 (0)