Skip to content

Commit

Permalink
MDEV-31905 GTID inconsistency
Browse files Browse the repository at this point in the history
This commit fixes GTID inconsistency which was injected by mariabackup SST.
Donor node now writes new info file: donor_galera_info, which is streamed
along the mariabackup donation to the joiner node. The donor_galera_info
file contains both GTID and gtid domain_id, and joiner will use these to
initialize the GTID state.

Commit has new mtr test case: galera_3nodes.galera_gtid_consistency, which
exercises potentially harmful mariabackup SST scenarios. The test has also
scenario with IST joining.

Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
  • Loading branch information
sjaakola authored and sysprg committed Dec 21, 2023
1 parent 569381d commit c89f769
Show file tree
Hide file tree
Showing 17 changed files with 692 additions and 15 deletions.
1 change: 1 addition & 0 deletions extra/mariabackup/backup_copy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1674,6 +1674,7 @@ ibx_copy_incremental_over_full()
NULL};
const char *sup_files[] = {"xtrabackup_binlog_info",
"xtrabackup_galera_info",
"donor_galera_info",
"xtrabackup_slave_info",
"xtrabackup_info",
"ib_lru_dump",
Expand Down
1 change: 1 addition & 0 deletions extra/mariabackup/backup_copy.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
/* special files */
#define XTRABACKUP_SLAVE_INFO "xtrabackup_slave_info"
#define XTRABACKUP_GALERA_INFO "xtrabackup_galera_info"
#define XTRABACKUP_DONOR_GALERA_INFO "donor_galera_info"
#define XTRABACKUP_BINLOG_INFO "xtrabackup_binlog_info"
#define XTRABACKUP_INFO "xtrabackup_info"

Expand Down
28 changes: 26 additions & 2 deletions extra/mariabackup/backup_mysql.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1424,6 +1424,7 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection)
{
char *state_uuid = NULL, *state_uuid55 = NULL;
char *last_committed = NULL, *last_committed55 = NULL;
char *domain_id = NULL, *domain_id55 = NULL;
bool result;

mysql_variable status[] = {
Expand All @@ -1434,6 +1435,12 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection)
{NULL, NULL}
};

mysql_variable value[] = {
{"Wsrep_gtid_domain_id", &domain_id},
{"wsrep_gtid_domain_id", &domain_id55},
{NULL, NULL}
};

/* When backup locks are supported by the server, we should skip
creating xtrabackup_galera_info file on the backup stage, because
wsrep_local_state_uuid and wsrep_last_committed will be inconsistent
Expand All @@ -1452,9 +1459,26 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection)
goto cleanup;
}

read_mysql_variables(connection, "SHOW VARIABLES LIKE 'wsrep%'", value, true);

if (domain_id == NULL && domain_id55 == NULL) {
msg("Warning: failed to get master wsrep state from SHOW VARIABLES.");
result = true;
goto cleanup;
}

result = datasink->backup_file_printf(XTRABACKUP_GALERA_INFO,
"%s:%s\n", state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55);
"%s:%s %s\n", state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55,
domain_id ? domain_id : domain_id55);

if (result)
{
result= datasink->backup_file_printf(XTRABACKUP_DONOR_GALERA_INFO,
"%s:%s %s\n", state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55,
domain_id ? domain_id : domain_id55);
}
if (result)
{
write_current_binlog_file(datasink, connection);
Expand Down
10 changes: 6 additions & 4 deletions extra/mariabackup/wsrep.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ permission notice:

/*! Name of file where Galera info is stored on recovery */
#define XB_GALERA_INFO_FILENAME "xtrabackup_galera_info"
#define XB_GALERA_DONOR_INFO_FILENAME "donor_galera_info"

/***********************************************************************
Store Galera checkpoint info in the 'xtrabackup_galera_info' file, if that
Expand All @@ -67,7 +68,7 @@ xb_write_galera_info(bool incremental_prepare)
long long seqno;
MY_STAT statinfo;

/* Do not overwrite existing an existing file to be compatible with
/* Do not overwrite an existing file to be compatible with
servers with older server versions */
if (!incremental_prepare &&
my_stat(XB_GALERA_INFO_FILENAME, &statinfo, MYF(0)) != NULL) {
Expand Down Expand Up @@ -101,10 +102,11 @@ xb_write_galera_info(bool incremental_prepare)

seqno = wsrep_xid_seqno(&xid);

msg("mariabackup: Recovered WSREP position: %s:%lld\n",
uuid_str, (long long) seqno);
msg("mariabackup: Recovered WSREP position: %s:%lld domain_id: %lld\n",
uuid_str, (long long) seqno, (long long)wsrep_get_domain_id());

if (fprintf(fp, "%s:%lld", uuid_str, (long long) seqno) < 0) {
if (fprintf(fp, "%s:%lld %lld", uuid_str, (long long) seqno,
(long long)wsrep_get_domain_id()) < 0) {

die(
"could not write to " XB_GALERA_INFO_FILENAME
Expand Down
3 changes: 3 additions & 0 deletions include/mysql/service_wsrep.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ extern struct wsrep_service_st {
void (*wsrep_thd_kill_LOCK_func)(const MYSQL_THD thd);
void (*wsrep_thd_kill_UNLOCK_func)(const MYSQL_THD thd);
void (*wsrep_thd_set_wsrep_PA_unsafe_func)(MYSQL_THD thd);
uint32 (*wsrep_get_domain_id_func)();
} *wsrep_service;

#define MYSQL_SERVICE_WSREP_INCLUDED
Expand Down Expand Up @@ -139,6 +140,7 @@ extern struct wsrep_service_st {
#define wsrep_thd_set_ignored_error(T,V) wsrep_service->wsrep_thd_set_ignored_error_func(T,V)
#define wsrep_report_bf_lock_wait(T,I) wsrep_service->wsrep_report_bf_lock_wait(T,I)
#define wsrep_thd_set_PA_unsafe(T) wsrep_service->wsrep_thd_set_PA_unsafe_func(T)
#define wsrep_get_domain_id(T) wsrep_service->wsrep_get_domain_id_func(T)
#else

#define MYSQL_SERVICE_WSREP_STATIC_INCLUDED
Expand Down Expand Up @@ -241,5 +243,6 @@ extern "C" void wsrep_report_bf_lock_wait(const THD *thd,
unsigned long long trx_id);
/* declare parallel applying unsafety for the THD */
extern "C" void wsrep_thd_set_PA_unsafe(MYSQL_THD thd);
extern "C" uint32 wsrep_get_domain_id();
#endif
#endif /* MYSQL_SERVICE_WSREP_INCLUDED */
5 changes: 5 additions & 0 deletions mysql-test/include/galera_sst_method.combinations
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[rsync]
wsrep-sst-method=rsync

[mariabackup]
wsrep_sst_method=mariabackup
4 changes: 4 additions & 0 deletions mysql-test/include/galera_sst_method.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# The goal of including this file is to enable galera_sst_method combinations
# (see include/galera_sst_method.combinations)

--source include/have_innodb.inc
1 change: 1 addition & 0 deletions mysql-test/suite/galera_3nodes/r/MDEV-29171.result
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name;
@@wsrep_gtid_domain_id @@wsrep_node_name
100 node3
connection node_3;
connection node_1;
connection node_2;
connection node_1;
connection node_1;
Expand Down
219 changes: 219 additions & 0 deletions mysql-test/suite/galera_3nodes/r/galera_gtid_consistency.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
connection node_2;
connection node_1;
connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3;
connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2;
set wsrep_sync_wait=0;
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
set wsrep_sync_wait=0;
connection node_1;
CREATE PROCEDURE insert_row (IN node varchar(10), IN repeat_count int)
BEGIN
DECLARE current_num int;
SET current_num = 0;
WHILE current_num < repeat_count do
INSERT INTO t1(node, name) VALUES (node, UUID());
SET current_num = current_num + 1;
END WHILE;
END|
CREATE TABLE t1 (id bigint not null primary key auto_increment, node VARCHAR(10), name VARCHAR(64)) ENGINE=innodb;
# node_1
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2
connection node_2;
# node_2
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2
connection node_3;
# node_3
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2
connection node_1;
CALL insert_row('node1', 500);;
connection node_2;
CALL insert_row('node2', 500);;
connection node_3;
CALL insert_row('node3', 500);;
connection node_2;
# Shutdown node_2, force SST
connection node_2b;
# Wait until node_2 leaves cluster
connection node_1b;
connection node_1;
connection node_3;
connection node_1;
CALL insert_row('node1', 500);
connection node_3;
CALL insert_row('node3', 500);
CREATE TABLE t2(i int primary key) engine=innodb;
connection node_2;
# Restart node_2
# restart
connection node_1b;
# Wait until node_2 is back in cluster
# node2 has joined
# GTID in node1
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2503
connection node_2;
# GTID in node2
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2503
connection node_3;
# GTID in node3
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2503
# Shutdown node_3
connection node_3;
SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 1';
# Wait until node_3 leaves cluster
connection node_1b;
connection node_1;
CALL insert_row('node1', 50);
CREATE TABLE t3(i int primary key) engine=innodb;
connection node_3;
# Rejoin node_3
SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 0';
connection node_1b;
# Wait until node_3 is back in cluster
# node3 has joined
connection node_1;
# GTID in node1
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2554
connection node_2;
# GTID in node2
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2554
connection node_3;
# GTID in node3
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2554
# One by one shutdown all nodes
connection node_3;
# shutdown node_3
connection node_2;
# wait until node_3 is out of cluster
# shutdown node_2
connection node_1;
# wait until node_2 is out of cluster
# shutdown node_1
# Bootstrap from node_1
connection node_1;
# restart: --wsrep_new_cluster
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2554
ANALYZE TABLE t2;
Table Op Msg_type Msg_text
test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
CALL insert_row('node1', 100);;
# Restart node_2
connection node_2;
# restart
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
set wsrep_sync_wait=0;
connection node_1c;
# wait until node_1 and node_2 are in cluster
connection node_2;
ALTER TABLE t2 ADD COLUMN (k int);
CALL insert_row('node2', 100);;
# Restart node_3
connection node_3;
# restart
connection node_1c;
# wait until all nodes are back in cluster
after cluster restart
connection node_2;
connection node_1;
connection node_1;
node1 GTID
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2756
connection node_2;
node2 GTID
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2756
connection node_3;
node3 GTID
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2756
connection node_1;
table size in node1
SELECT COUNT(*) FROM t1;
COUNT(*)
2750
connection node_2;
table size in node2
SELECT COUNT(*) FROM t1;
COUNT(*)
2750
connection node_3;
table size in node3
SELECT COUNT(*) FROM t1;
COUNT(*)
2750
connection node_2;
call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node");
call mtr.add_suppression("WSREP: Sending JOIN failed:.*");
call mtr.add_suppression("Sending JOIN failed:.*");
call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*");
connection node_3;
call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node");
call mtr.add_suppression("WSREP: Sending JOIN failed:.*");
call mtr.add_suppression("Sending JOIN failed:.*");
call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*");
# cleanup
connection node_1;
DROP PROCEDURE insert_row;
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
connection node_3;
connection node_2;
disconnect node_3;
disconnect node_2b;
disconnect node_1b;
disconnect node_1c;
Loading

0 comments on commit c89f769

Please sign in to comment.