Skip to content

Commit aa845d1

Browse files
committed
MDEV-6391: GTID binlog state not recovered if mariadb-bin.state is removed
When the server starts up, check if the master-bin.state file was lost. If it was, recover its contents by scanning the last binlog file, thus avoiding running with a corrupt binlog state.
1 parent ec4ff9a commit aa845d1

File tree

3 files changed

+174
-8
lines changed

3 files changed

+174
-8
lines changed

mysql-test/suite/rpl/r/rpl_gtid_crash.result

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,5 +267,55 @@ a
267267
24
268268
26
269269
27
270+
*** MDEV-6391: GTID binlog state not recovered if mariadb-bin.state is removed ***
271+
include/stop_slave.inc
272+
INSERT INTO t1 VALUES (30);
273+
SET @old_server_id= @@server_id;
274+
SET @old_domain_id= @@gtid_domain_id;
275+
SET SESSION server_id= 10;
276+
INSERT INTO t1 VALUES (31);
277+
INSERT INTO t1 VALUES (32);
278+
SET SESSION gtid_domain_id= 1;
279+
SET SESSION server_id=11;
280+
INSERT INTO t1 VALUES (33);
281+
SET SESSION gtid_domain_id= 2;
282+
INSERT INTO t1 VALUES (34);
283+
SET SESSION server_id= 10;
284+
INSERT INTO t1 VALUES (35);
285+
INSERT INTO t1 VALUES (36);
286+
SET SESSION gtid_domain_id= 0;
287+
SET SESSION server_id= 12;
288+
INSERT INTO t1 VALUES (37);
289+
SET SESSION gtid_domain_id= @old_domain_id;
290+
SET SESSION server_id= @old_server_id;
291+
INSERT INTO t1 VALUES (38);
292+
INSERT INTO t1 VALUES (39);
293+
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
294+
a
295+
30
296+
31
297+
32
298+
33
299+
34
300+
35
301+
36
302+
37
303+
38
304+
39
305+
include/save_master_gtid.inc
306+
include/start_slave.inc
307+
include/sync_with_master_gtid.inc
308+
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
309+
a
310+
30
311+
31
312+
32
313+
33
314+
34
315+
35
316+
36
317+
37
318+
38
319+
39
270320
DROP TABLE t1;
271321
include/rpl_end.inc

mysql-test/suite/rpl/t/rpl_gtid_crash.test

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,77 @@ eval SELECT IF(INSTR(@@gtid_current_pos, '$saved_gtid'), "Current pos ok", CONCA
587587
SELECT * from t1 WHERE a > 10 ORDER BY a;
588588

589589

590+
--echo *** MDEV-6391: GTID binlog state not recovered if mariadb-bin.state is removed ***
591+
592+
--connection server_2
593+
--source include/stop_slave.inc
594+
595+
# Do some misc. transactions, stop the master, drop the master-bin.state file.
596+
# Start the master back up, check that binlog state is correct.
597+
598+
--connection server_1
599+
600+
INSERT INTO t1 VALUES (30);
601+
SET @old_server_id= @@server_id;
602+
SET @old_domain_id= @@gtid_domain_id;
603+
604+
SET SESSION server_id= 10;
605+
INSERT INTO t1 VALUES (31);
606+
INSERT INTO t1 VALUES (32);
607+
SET SESSION gtid_domain_id= 1;
608+
SET SESSION server_id=11;
609+
INSERT INTO t1 VALUES (33);
610+
SET SESSION gtid_domain_id= 2;
611+
INSERT INTO t1 VALUES (34);
612+
SET SESSION server_id= 10;
613+
INSERT INTO t1 VALUES (35);
614+
INSERT INTO t1 VALUES (36);
615+
SET SESSION gtid_domain_id= 0;
616+
SET SESSION server_id= 12;
617+
INSERT INTO t1 VALUES (37);
618+
SET SESSION gtid_domain_id= @old_domain_id;
619+
SET SESSION server_id= @old_server_id;
620+
INSERT INTO t1 VALUES (38);
621+
INSERT INTO t1 VALUES (39);
622+
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
623+
--source include/save_master_gtid.inc
624+
625+
--let OLD_STATE= `SELECT @@gtid_binlog_state`
626+
627+
--let $datadir= `SELECT @@datadir`
628+
629+
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
630+
wait
631+
EOF
632+
shutdown_server 10;
633+
--source include/wait_until_disconnected.inc
634+
635+
--remove_file $datadir/master-bin.state
636+
637+
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
638+
restart
639+
EOF
640+
--enable_reconnect
641+
--source include/wait_until_connected_again.inc
642+
643+
--let NEW_STATE= `SELECT @@gtid_binlog_state`
644+
645+
--perl
646+
my $old= $ENV{'OLD_STATE'};
647+
my $new= $ENV{'NEW_STATE'};
648+
# Make them order-independent, for easy comparison.
649+
$old= join(",", sort(split(",", $old)));
650+
$new= join(",", sort(split(",", $new)));
651+
die "ERROR: new binlog state '$new' differs from old '$old'\n"
652+
unless $old eq $new;
653+
EOF
654+
655+
--connection server_2
656+
--source include/start_slave.inc
657+
--source include/sync_with_master_gtid.inc
658+
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
659+
660+
590661
--connection server_1
591662
DROP TABLE t1;
592663

sql/log.cc

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5653,6 +5653,14 @@ MYSQL_BIN_LOG::write_state_to_file()
56535653
}
56545654

56555655

5656+
/*
5657+
Initialize the binlog state from the master-bin.state file, at server startup.
5658+
5659+
Returns:
5660+
0 for success.
5661+
2 for when .state file did not exist.
5662+
1 for other error.
5663+
*/
56565664
int
56575665
MYSQL_BIN_LOG::read_state_from_file()
56585666
{
@@ -5680,7 +5688,7 @@ MYSQL_BIN_LOG::read_state_from_file()
56805688
with GTID enabled. So initialize to empty state.
56815689
*/
56825690
rpl_global_gtid_binlog_state.reset();
5683-
err= 0;
5691+
err= 2;
56845692
goto end;
56855693
}
56865694
}
@@ -9444,7 +9452,17 @@ MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
94449452
if (error != LOG_INFO_EOF)
94459453
sql_print_error("find_log_pos() failed (error: %d)", error);
94469454
else
9455+
{
94479456
error= read_state_from_file();
9457+
if (error == 2)
9458+
{
9459+
/*
9460+
No binlog files and no binlog state is not an error (eg. just initial
9461+
server start after fresh installation).
9462+
*/
9463+
error= 0;
9464+
}
9465+
}
94489466
return error;
94499467
}
94509468

@@ -9470,15 +9488,42 @@ MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
94709488

94719489
if ((ev= Log_event::read_log_event(&log, 0, &fdle,
94729490
opt_master_verify_checksum)) &&
9473-
ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
9474-
ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
9491+
ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
94759492
{
9476-
sql_print_information("Recovering after a crash using %s", opt_name);
9477-
error= recover(&log_info, log_name, &log,
9478-
(Format_description_log_event *)ev, do_xa_recovery);
9493+
if (ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
9494+
{
9495+
sql_print_information("Recovering after a crash using %s", opt_name);
9496+
error= recover(&log_info, log_name, &log,
9497+
(Format_description_log_event *)ev, do_xa_recovery);
9498+
}
9499+
else
9500+
{
9501+
error= read_state_from_file();
9502+
if (error == 2)
9503+
{
9504+
/*
9505+
The binlog exists, but the .state file is missing. This is normal if
9506+
this is the first master start after a major upgrade to 10.0 (with
9507+
GTID support).
9508+
9509+
However, it could also be that the .state file was lost somehow, and
9510+
in this case it could be a serious issue, as we would set the wrong
9511+
binlog state in the next binlog file to be created, and GTID
9512+
processing would be corrupted. A common way would be copying files
9513+
from an old server to a new one and forgetting the .state file.
9514+
9515+
So in this case, we want to try to recover the binlog state by
9516+
scanning the last binlog file (but we do not need any XA recovery).
9517+
9518+
ToDo: We could avoid one scan at first start after major upgrade, by
9519+
detecting that there is no GTID_LIST event at the start of the
9520+
binlog file, and stopping the scan in that case.
9521+
*/
9522+
error= recover(&log_info, log_name, &log,
9523+
(Format_description_log_event *)ev, false);
9524+
}
9525+
}
94799526
}
9480-
else
9481-
error= read_state_from_file();
94829527

94839528
delete ev;
94849529
end_io_cache(&log);

0 commit comments

Comments
 (0)