diff --git a/include/rpl_gtid_base.h b/include/rpl_gtid_base.h index e197c71a4f6ab..51d93150b1041 100644 --- a/include/rpl_gtid_base.h +++ b/include/rpl_gtid_base.h @@ -22,6 +22,7 @@ /* Definitions for MariaDB global transaction ID (GTID). */ struct slave_connection_state; +class Domain_id_filter; struct rpl_gtid { diff --git a/mysql-test/suite/rpl/r/rpl_gtid_ignored_domain_ids_validation.result b/mysql-test/suite/rpl/r/rpl_gtid_ignored_domain_ids_validation.result new file mode 100644 index 0000000000000..793d3304842c7 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_gtid_ignored_domain_ids_validation.result @@ -0,0 +1,137 @@ +include/rpl_init.inc [topology=1->2] +# +# Setup: Create tables in two GTID domains on master +# +connection server_1; +SET @@session.gtid_domain_id= 1; +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1); +SET @@session.gtid_domain_id= 2; +CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t2 VALUES (100); +connection server_2; +SELECT * FROM t1; +a +1 +SELECT * FROM t2; +a +100 +# +# Test 1: IGNORE_DOMAIN_IDS with purged binlogs should not cause error 1236 +# +connection server_2; +include/stop_slave.inc +connection server_1; +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (200); +INSERT INTO t2 VALUES (201); +FLUSH LOGS; +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (202); +FLUSH LOGS; +include/wait_for_purge.inc "master-bin.000003" +show binary logs; +Log_name File_size +master-bin.000003 # +connection server_2; +CHANGE MASTER TO IGNORE_DOMAIN_IDS=(2), MASTER_USE_GTID=slave_pos; +include/start_slave.inc +connection server_1; +SET @@session.gtid_domain_id= 1; +INSERT INTO t1 VALUES (2); +connection server_2; +# Slave should have domain 1 data (the domain it cares about) +SELECT * FROM t1 ORDER BY a; +a +1 +2 +connection server_1; +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (250); +SET @@session.gtid_domain_id= 1; +INSERT INTO t1 VALUES (5); +connection server_2; +# Domain 1 row should arrive; domain 2 row should be filtered out +SELECT * FROM t1 ORDER BY a; +a +1 +2 +5 +SELECT MAX(a) < 250 AS domain2_filtered FROM t2; +domain2_filtered +1 +# +# Test 2: DO_DOMAIN_IDS with purged binlogs should not cause error 1236 +# +connection server_2; +include/stop_slave.inc +connection server_1; +connection server_2; +CHANGE MASTER TO IGNORE_DOMAIN_IDS=(), DO_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos; +include/start_slave.inc +connection server_1; +SET @@session.gtid_domain_id= 1; +INSERT INTO t1 VALUES (3); +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (300); +connection server_2; +include/stop_slave.inc +connection server_1; +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (400); +INSERT INTO t2 VALUES (401); +FLUSH LOGS; +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (402); +FLUSH LOGS; +include/wait_for_purge.inc "master-bin.000005" +show binary logs; +Log_name File_size +master-bin.000005 # +connection server_2; +CHANGE MASTER TO DO_DOMAIN_IDS=(1), IGNORE_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos; +include/start_slave.inc +connection server_1; +SET @@session.gtid_domain_id= 1; +INSERT INTO t1 VALUES (4); +connection server_2; +# Slave should have all domain 1 data +SELECT * FROM t1 ORDER BY a; +a +1 +2 +3 +4 +5 +connection server_1; +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (500); +SET @@session.gtid_domain_id= 1; +INSERT INTO t1 VALUES (6); +connection server_2; +# Domain 1 row should arrive; domain 2 row should be filtered +SELECT * FROM t1 ORDER BY a; +a +1 +2 +3 +4 +5 +6 +SELECT MAX(a) < 500 AS domain2_filtered FROM t2; +domain2_filtered +1 +# +# Cleanup +# +connection server_2; +include/stop_slave.inc +connection server_1; +connection server_2; +CHANGE MASTER TO DO_DOMAIN_IDS=(), IGNORE_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos; +include/start_slave.inc +connection server_1; +SET @@session.gtid_domain_id= 0; +DROP TABLE t1, t2; +connection server_2; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_gtid_ignored_domain_ids_validation.test b/mysql-test/suite/rpl/t/rpl_gtid_ignored_domain_ids_validation.test new file mode 100644 index 0000000000000..a9dd9837348c8 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_gtid_ignored_domain_ids_validation.test @@ -0,0 +1,219 @@ +--source include/have_innodb.inc +--let $rpl_topology=1->2 +--source include/rpl_init.inc + +# +# MDEV-28213: A slave's ignored domain ids should not be validated when +# connecting to a master. +# +# When a slave connects to a master using MASTER_USE_GTID=Slave_Pos and the +# master has purged old binlogs, the master validates the slave's GTID state +# against the oldest available binlog's Gtid_list event. If the master's +# Gtid_list contains domains that the slave is configured to ignore (via +# IGNORE_DOMAIN_IDS or DO_DOMAIN_IDS), those domains should NOT be validated. +# Previously this would cause error 1236. +# + +--echo # +--echo # Setup: Create tables in two GTID domains on master +--echo # + +--connection server_1 +SET @@session.gtid_domain_id= 1; +CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1); + +SET @@session.gtid_domain_id= 2; +CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB; +INSERT INTO t2 VALUES (100); + +# Sync slave with master for both domains +--save_master_pos + +--connection server_2 +--sync_with_master + +SELECT * FROM t1; +SELECT * FROM t2; + +--echo # +--echo # Test 1: IGNORE_DOMAIN_IDS with purged binlogs should not cause error 1236 +--echo # + +--connection server_2 +--source include/stop_slave.inc + +--connection server_1 +# While slave is stopped, only advance domain 2 (which will be ignored). +# Domain 1 stays at the same position so the slave can still connect for it. +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (200); +INSERT INTO t2 VALUES (201); + +# Flush logs to rotate the binlog file, then flush again so we have a newer +# file to purge up to. +FLUSH LOGS; +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (202); +FLUSH LOGS; + +# Now purge all binlogs except the latest. +# The oldest remaining binlog's Gtid_list will reference both domains. +# Domain 1 will still be at the position the slave knows (up to date). +# Domain 2 will have advanced past the slave's position (stale for slave). +--let $purge_binlogs_to= query_get_value(SHOW MASTER STATUS, File, 1) +--source include/wait_for_purge.inc +--source include/show_binary_logs.inc + +--connection server_2 +# Configure slave to ignore domain 2. The slave has no up-to-date position +# for domain 2 since it was stopped before the latest domain 2 transactions. +# Without the fix, connecting would fail with error 1236 because the master +# validates domain 2's position even though the slave doesn't care about it. +CHANGE MASTER TO IGNORE_DOMAIN_IDS=(2), MASTER_USE_GTID=slave_pos; +--source include/start_slave.inc + +# Verify slave can connect and replicate domain 1 data +--connection server_1 +SET @@session.gtid_domain_id= 1; +INSERT INTO t1 VALUES (2); +--save_master_pos + +--connection server_2 +--sync_with_master + +--echo # Slave should have domain 1 data (the domain it cares about) +SELECT * FROM t1 ORDER BY a; + +# Verify that new transactions in the ignored domain (2) do not break +# replication. Previously, the connection would never get this far because +# it would error at connect time. +--connection server_1 +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (250); +SET @@session.gtid_domain_id= 1; +INSERT INTO t1 VALUES (5); +--save_master_pos + +--connection server_2 +--sync_with_master + +--echo # Domain 1 row should arrive; domain 2 row should be filtered out +SELECT * FROM t1 ORDER BY a; +# t2 should NOT have the new row since domain 2 is ignored +SELECT MAX(a) < 250 AS domain2_filtered FROM t2; + +--echo # +--echo # Test 2: DO_DOMAIN_IDS with purged binlogs should not cause error 1236 +--echo # + +--connection server_2 +--source include/stop_slave.inc + +# The slave ignored domain 2 during test 1, so its position for domain 2 +# is stale. We need to update it to match the master before we can start +# replication without the ignore filter. +--connection server_1 +--let $master_pos= `SELECT @@GLOBAL.gtid_binlog_pos` +--connection server_2 +--disable_query_log +--eval SET GLOBAL gtid_slave_pos='$master_pos' +--enable_query_log + +# Reset domain filtering and sync up +CHANGE MASTER TO IGNORE_DOMAIN_IDS=(), DO_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos; +--source include/start_slave.inc + +# Sync up everything +--connection server_1 +SET @@session.gtid_domain_id= 1; +INSERT INTO t1 VALUES (3); +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (300); +--save_master_pos + +--connection server_2 +--sync_with_master +--source include/stop_slave.inc + +--connection server_1 +# While slave is stopped, only advance domain 2 (which will NOT be in DO list). +# Domain 1 stays current so the slave can connect for it. +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (400); +INSERT INTO t2 VALUES (401); + +# Create new binlog and purge old ones +FLUSH LOGS; +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (402); +FLUSH LOGS; + +# Purge all old binlogs +--let $purge_binlogs_to= query_get_value(SHOW MASTER STATUS, File, 1) +--source include/wait_for_purge.inc +--source include/show_binary_logs.inc + +--connection server_2 +# Configure slave with DO_DOMAIN_IDS=(1) -- only replicate domain 1. +# This means domain 2 should be ignored during validation. +# Without the fix, this would fail because the master's oldest binlog +# references domain 2 but slave may not have the latest position for it. +CHANGE MASTER TO DO_DOMAIN_IDS=(1), IGNORE_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos; +--source include/start_slave.inc + +--connection server_1 +SET @@session.gtid_domain_id= 1; +INSERT INTO t1 VALUES (4); +--save_master_pos + +--connection server_2 +--sync_with_master + +--echo # Slave should have all domain 1 data +SELECT * FROM t1 ORDER BY a; + +# Verify that new transactions in the non-DO domain (2) do not break +# replication when DO_DOMAIN_IDS=(1) is active. +--connection server_1 +SET @@session.gtid_domain_id= 2; +INSERT INTO t2 VALUES (500); +SET @@session.gtid_domain_id= 1; +INSERT INTO t1 VALUES (6); +--save_master_pos + +--connection server_2 +--sync_with_master + +--echo # Domain 1 row should arrive; domain 2 row should be filtered +SELECT * FROM t1 ORDER BY a; +SELECT MAX(a) < 500 AS domain2_filtered FROM t2; + +--echo # +--echo # Cleanup +--echo # + +--connection server_2 +--source include/stop_slave.inc + +# Update slave's GTID position to match master for all domains, +# since domain 2 was not replicated during test 2. +--connection server_1 +--let $master_pos= `SELECT @@GLOBAL.gtid_binlog_pos` +--connection server_2 +--disable_query_log +--eval SET GLOBAL gtid_slave_pos='$master_pos' +--enable_query_log + +CHANGE MASTER TO DO_DOMAIN_IDS=(), IGNORE_DOMAIN_IDS=(), MASTER_USE_GTID=slave_pos; +--source include/start_slave.inc + +--connection server_1 +SET @@session.gtid_domain_id= 0; +DROP TABLE t1, t2; +--save_master_pos + +--connection server_2 +--sync_with_master + +--source include/rpl_end.inc diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index 0eb48e4b0c2a3..4a6ee2b2028ac 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -27,6 +27,7 @@ #include "sql_parse.h" #include "key.h" #include "rpl_rli.h" +#include "rpl_mi.h" #include "slave.h" #include "log_event.h" #include "transaction.h" @@ -1720,6 +1721,18 @@ rpl_binlog_state_base::is_before_pos(slave_connection_state *pos) OR we are identical, but there's some other server_id after) THEN that position lies before our state. */ + /* + If the slave is configured to ignore this domain (MDEV-28213), + skip the check. The slave doesn't care about this domain's position. + */ +#ifdef HAVE_REPLICATION + if (pos->domain_filter) + { + pos->domain_filter->do_filter(e->gtid.domain_id); + if (pos->domain_filter->is_group_filtered()) + continue; + } +#endif element *elem; if ((elem= (element *)my_hash_search(&hash, (const uchar *)&e->gtid.domain_id, @@ -1744,7 +1757,21 @@ rpl_binlog_state_base::is_before_pos(slave_connection_state *pos) const element *elem= (const element *) my_hash_element(&hash, i); if (likely(elem->hash.records > 0) && !pos->find(elem->domain_id)) + { + /* + If the slave is configured to ignore this domain (MDEV-28213), + skip the check rather than returning false. + */ +#ifdef HAVE_REPLICATION + if (pos->domain_filter) + { + pos->domain_filter->do_filter(elem->domain_id); + if (pos->domain_filter->is_group_filtered()) + continue; + } +#endif return false; + } } /* Nothing in our state lies after anything in the position. */ @@ -2360,6 +2387,9 @@ rpl_binlog_state::drop_domain(DYNAMIC_ARRAY *ids, slave_connection_state::slave_connection_state() { +#ifdef HAVE_REPLICATION + domain_filter= NULL; +#endif my_hash_init(PSI_INSTRUMENT_ME, &hash, &my_charset_bin, 32, offsetof(entry, gtid) + offsetof(rpl_gtid, domain_id), sizeof(rpl_gtid::domain_id), NULL, my_free, HASH_UNIQUE); @@ -2369,6 +2399,9 @@ slave_connection_state::slave_connection_state() slave_connection_state::~slave_connection_state() { +#ifdef HAVE_REPLICATION + delete domain_filter; +#endif my_hash_free(&hash); delete_dynamic(>id_sort_array); } diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h index 743bcfe67791e..14d34b3deda76 100644 --- a/sql/rpl_gtid.h +++ b/sql/rpl_gtid.h @@ -357,6 +357,15 @@ struct slave_connection_state /* Auxiliary buffer to sort gtid list. */ DYNAMIC_ARRAY gtid_sort_array; +#ifdef HAVE_REPLICATION + /* + Domain ID filter sent by the slave (via IGNORE_DOMAIN_IDS / DO_DOMAIN_IDS). + NULL until the slave actually sends domain ID lists (MDEV-28213). + Owned by this struct; deleted in ~slave_connection_state(). + */ + Domain_id_filter *domain_filter; +#endif + slave_connection_state(); ~slave_connection_state(); diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc index abdc3d967bafc..1a456e672f269 100644 --- a/sql/rpl_mi.cc +++ b/sql/rpl_mi.cc @@ -1368,6 +1368,11 @@ Domain_id_filter::Domain_id_filter() : m_filter(false) } } +const LEX_CSTRING Domain_id_filter::var_name_ignore= + { STRING_WITH_LEN("slave_connect_state_domain_ids_ignore") }; +const LEX_CSTRING Domain_id_filter::var_name_do= + { STRING_WITH_LEN("slave_connect_state_domain_ids_do") }; + Domain_id_filter::~Domain_id_filter() { for (int i= DO_DOMAIN_IDS; i <= IGNORE_DOMAIN_IDS; i ++) diff --git a/sql/rpl_mi.h b/sql/rpl_mi.h index a3e9c553fe83c..83fc54a420e7c 100644 --- a/sql/rpl_mi.h +++ b/sql/rpl_mi.h @@ -99,6 +99,13 @@ class Domain_id_filter /// Serialize and store the ids from domain id lists into a group of fields. void store_ids(Field ***field); + + /* + User variable names for sending domain ID filter lists from slave to + master at connect time (MDEV-28213). + */ + static const LEX_CSTRING var_name_ignore; + static const LEX_CSTRING var_name_do; }; diff --git a/sql/slave.cc b/sql/slave.cc index c96c93a598ac9..dcfd2ebfea540 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1511,6 +1511,40 @@ bool is_network_error(uint errorno) 2 transient network problem, the caller should try to reconnect */ + +/* + Build a SET @var_name='id1,id2,...' query string from a DYNAMIC_ARRAY + of ulong domain IDs. + + @param[in,out] query_str String to write the query into (reset first) + @param[in] ids Array of ulong domain IDs + @param[in] var_name User variable name (without '@') + + @retval false success + @retval true out of memory +*/ +static bool +build_domain_ids_query(String *query_str, const DYNAMIC_ARRAY *ids, + const char *var_name) +{ + query_str->length(0); + if (query_str->append(STRING_WITH_LEN("SET @"), system_charset_info) || + query_str->append(var_name, strlen(var_name), system_charset_info) || + query_str->append(STRING_WITH_LEN("='"), system_charset_info)) + return true; + for (uint i= 0; i < ids->elements; i++) + { + ulong domain_id; + get_dynamic((DYNAMIC_ARRAY *) ids, (void *) &domain_id, i); + if (i > 0) + query_str->append(','); + query_str->append_ulonglong(domain_id); + } + query_str->append(STRING_WITH_LEN("'"), system_charset_info); + return false; +} + + static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi) { char err_buff[MAX_SLAVE_ERRMSG], err_buff2[MAX_SLAVE_ERRMSG]; @@ -2412,6 +2446,75 @@ when it try to get the value of TIME_ZONE global variable from master."; } } } + + /* + Send the slave's domain ID filter to the master, so it can skip GTID + state validation for domains the slave doesn't care about. See + MDEV-28213. + + Only one of IGNORE_DOMAIN_IDS or DO_DOMAIN_IDS can be active at a + time, so we determine which is configured and send only that one. + + This is done as a user variable so that older masters that don't know + about it simply ignore it (backwards compatible). + */ + { + const DYNAMIC_ARRAY *ids= NULL; + const LEX_CSTRING *var_name= NULL; + + if (mi->domain_id_filter.m_domain_ids[ + Domain_id_filter::IGNORE_DOMAIN_IDS].elements > 0) + { + ids= &mi->domain_id_filter.m_domain_ids[ + Domain_id_filter::IGNORE_DOMAIN_IDS]; + var_name= &Domain_id_filter::var_name_ignore; + } + else if (mi->domain_id_filter.m_domain_ids[ + Domain_id_filter::DO_DOMAIN_IDS].elements > 0) + { + ids= &mi->domain_id_filter.m_domain_ids[ + Domain_id_filter::DO_DOMAIN_IDS]; + var_name= &Domain_id_filter::var_name_do; + } + + if (ids) + { + if (build_domain_ids_query(&query_str, ids, var_name->str)) + { + err_code= ER_OUTOFMEMORY; + errmsg= "The slave I/O thread stops because a fatal out-of-memory " + "error is encountered when it tries to set @"; + sprintf(err_buff, "%s%.*s. Error: Out of memory", + errmsg, (int) var_name->length, var_name->str); + goto err; + } + + rc= mysql_real_query(mysql, query_str.ptr(), query_str.length()); + if (unlikely(rc)) + { + if (check_io_slave_killed(mi, NULL)) + goto slave_killed_err; + err_code= mysql_errno(mysql); + if (is_network_error(err_code)) + { + mi->report(ERROR_LEVEL, err_code, NULL, + "Setting @%.*s failed with error: %s", + (int) var_name->length, var_name->str, + mysql_error(mysql)); + goto network_err; + } + else + { + errmsg= "The slave I/O thread stops because a fatal error is " + "encountered when it tries to set @"; + sprintf(err_buff, "%s%.*s. Error: %s", + errmsg, (int) var_name->length, var_name->str, + mysql_error(mysql)); + goto err; + } + } + } + } } else { diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index c4c3e7d338b39..40bb627066f0e 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -195,7 +195,10 @@ struct binlog_send_info { bzero(&error_gtid, sizeof(error_gtid)); until_binlog_state.init(); } - ~binlog_send_info() { delete engine_binlog_reader; } + ~binlog_send_info() + { + delete engine_binlog_reader; + } }; // prototype @@ -1064,6 +1067,84 @@ get_slave_gtid_until_before_gtids(THD *thd) } +/* + Retrieve a user variable value as a string. + + Looks up the specified user variable in the THD's user_vars hash and + writes its string representation into out_str. + + @param[in] thd Current thread + @param[in] name Name of the user variable (without '@') + @param[in] name_len Length of the name + @param[out] out_str Output string + + @retval false Variable not found or NULL + @retval true Success (value written to out_str) +*/ +static bool +get_user_var_string(THD *thd, const char *name, size_t name_len, + String *out_str) +{ + bool null_value; + user_var_entry *entry= + (user_var_entry*) my_hash_search(&thd->user_vars, (uchar*) name, + name_len); + return entry && entry->val_str(&null_value, out_str, 0) && !null_value; +} + + +/* + Parse a comma-separated list of domain IDs from a string and load them + into a DYNAMIC_ARRAY of uint32 elements. Used to populate a Domain_id_filter + from the slave's user variables sent before COM_BINLOG_DUMP. + + @retval false success + @retval true error (parse error or out of memory) +*/ +static bool +load_domain_ids_from_string(const char *str, size_t len, DYNAMIC_ARRAY *ids) +{ + const char *p= str; + const char *end= str + len; + + while (p < end) + { + /* Skip whitespace */ + while (p < end && *p == ' ') + p++; + if (p >= end) + break; + + /* Parse one domain ID using MariaDB's internal function for consistency */ + char *q= (char *) end; + int err= 0; + uint64 v= (uint64) my_strtoll10(p, &q, &err); + if (err != 0 || v > (uint32) 0xffffffff || q == p) + return true; /* Parse error or overflow */ + /* + Domain_id_filter's DYNAMIC_ARRAY stores ulong elements (matching + do_filter's argument type), so cast here after 32-bit range validation. + */ + ulong domain_id= (ulong) v; + if (insert_dynamic(ids, (uchar *) &domain_id)) + return true; /* Out of memory */ + p= q; + + /* Skip whitespace after number */ + while (p < end && *p == ' ') + p++; + + if (p >= end) + break; + if (*p != ',') + return true; /* Parse error: expected a comma */ + p++; + } + + return false; +} + + /* Function prepares and sends repliation heartbeat event. @@ -1263,6 +1344,11 @@ get_binlog_list(MEM_ROOT *memroot, bool reverse= true, Gtid_list_log_event where D is not present in the requested slave state at all. Since if D is not in requested slave state, it means that slave needs to start at the very first GTID in domain D. + + The filter parameter (MDEV-28213) allows domains that the slave has + configured to ignore to be skipped during this check. Without this, a slave + that ignores a domain would fail to connect if the master's oldest binlog + references that domain but the slave has no position for it. */ static bool contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev) @@ -1275,6 +1361,16 @@ contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev) const rpl_gtid *gtid= st->find(gl_domain_id); if (!gtid) { + /* + If the slave is configured to ignore this domain, skip the check. + The slave doesn't need events from this domain at all (MDEV-28213). + */ + if (st->domain_filter) + { + st->domain_filter->do_filter(gl_domain_id); + if (st->domain_filter->is_group_filtered()) + continue; + } /* The slave needs to start from the very beginning of this domain, which is in an earlier binlog file. So we need to search back further. @@ -1284,6 +1380,16 @@ contains_all_slave_gtid(slave_connection_state *st, Gtid_list_log_event *glev) if (gtid->server_id == glev->list[i].server_id && gtid->seq_no <= glev->list[i].seq_no) { + /* + If the slave is configured to ignore this domain, skip the check + even if the slave's position is behind the master's (MDEV-28213). + */ + if (st->domain_filter) + { + st->domain_filter->do_filter(gl_domain_id); + if (st->domain_filter->is_group_filtered()) + continue; + } /* The slave needs to start after gtid, but it is contained in an earlier binlog file. So we need to search back further, unless it was the very @@ -1365,6 +1471,20 @@ check_slave_start_position(binlog_send_info *info, const char **errormsg, rpl_gtid master_gtid; rpl_gtid master_replication_gtid; rpl_gtid start_gtid; + + /* + If the slave has configured this domain to be ignored (MDEV-28213), + skip validation for it entirely. The slave doesn't care about this + domain's events so there's no point requiring the master to have + the right binlog position for it. + */ + if (st->domain_filter) + { + st->domain_filter->do_filter(slave_gtid->domain_id); + if (st->domain_filter->is_group_filtered()) + continue; + } + bool start_at_own_slave_pos= rpl_global_gtid_slave_state->domain_to_gtid(slave_gtid->domain_id, &master_replication_gtid) && @@ -1568,6 +1688,11 @@ gtid_check_binlog_file(slave_connection_state *state, Try to lookup the GTID position in the gtid index. If that doesn't work, read the Gtid_list_log_event at the start of the binlog file to get the binlog state. + + When the slave has configured IGNORE_DOMAIN_IDS or DO_DOMAIN_IDS + (MDEV-28213), the Domain_id_filter stored in state->domain_filter is used + by is_before_pos() and contains_all_slave_gtid() so that domains the slave + does not care about are skipped during the search. */ if (normalize_binlog_name(buf, list->name.str, false)) { @@ -1769,7 +1894,19 @@ gtid_find_binlog_pos(slave_connection_state *state, char *out_name, their UNTIL condition. */ for (i= 0; i < count; ++i) + { + /* + Skip filtered domains (MDEV-28213): the slave does not track + these, so found_pos_check_gtid() would hit DBUG_ASSERT(0). + */ + if (state->domain_filter) + { + state->domain_filter->do_filter(gtids[i].domain_id); + if (state->domain_filter->is_group_filtered()) + continue; + } found_pos_check_gtid(&(gtids[i]), state, until_gtid_state); + } } goto end; @@ -1807,6 +1944,16 @@ gtid_find_engine_pos(binlog_send_info *info) return gtid_too_old_errmsg; until_binlog_state->iterate( [pos, until_gtid_pos] (const rpl_gtid *gtid) -> bool { + /* + Skip filtered domains (MDEV-28213): the slave does not track these, + so found_pos_check_gtid() would hit DBUG_ASSERT(0). + */ + if (pos->domain_filter) + { + pos->domain_filter->do_filter(gtid->domain_id); + if (pos->domain_filter->is_group_filtered()) + return false; + } found_pos_check_gtid(gtid, pos, until_gtid_pos); return false; }); @@ -2585,6 +2732,8 @@ static int init_binlog_sender(binlog_send_info *info, String connect_gtid_state(str_buf, sizeof(str_buf), system_charset_info); char str_buf2[128]; String slave_until_gtid_str(str_buf2, sizeof(str_buf2), system_charset_info); + char str_buf3[128]; + String domain_ids_str(str_buf3, sizeof(str_buf3), system_charset_info); connect_gtid_state.length(0); if (opt_binlog_engine_hton && @@ -2621,6 +2770,56 @@ static int init_binlog_sender(binlog_send_info *info, info->until_gtid_state= &info->until_gtid_state_obj; info->is_until_before_gtids= get_slave_gtid_until_before_gtids(thd); } + /* + Read the slave's domain ID filter list, if sent (MDEV-28213). + Lazy-initialized: the Domain_id_filter is only allocated if the slave + actually sends IGNORE_DOMAIN_IDS or DO_DOMAIN_IDS. Older slaves won't + send these, so the filter stays NULL and all domains are validated. + + Only one of IGNORE_DOMAIN_IDS or DO_DOMAIN_IDS can be active at a time, + so we check for the configured one and handle it directly. + */ + { + const LEX_CSTRING *var_name= NULL; + int list_type; + + if (get_user_var_string(thd, Domain_id_filter::var_name_ignore.str, + Domain_id_filter::var_name_ignore.length, + &domain_ids_str)) + { + var_name= &Domain_id_filter::var_name_ignore; + list_type= Domain_id_filter::IGNORE_DOMAIN_IDS; + } + else if (get_user_var_string(thd, Domain_id_filter::var_name_do.str, + Domain_id_filter::var_name_do.length, + &domain_ids_str)) + { + var_name= &Domain_id_filter::var_name_do; + list_type= Domain_id_filter::DO_DOMAIN_IDS; + } + + if (var_name) + { + info->gtid_state.domain_filter= new Domain_id_filter(); + if (!info->gtid_state.domain_filter) + { + info->errmsg= "Out of memory allocating domain ID filter"; + info->error= ER_OUTOFMEMORY; + return 1; + } + DYNAMIC_ARRAY *ids= + &info->gtid_state.domain_filter->m_domain_ids[list_type]; + if (load_domain_ids_from_string(domain_ids_str.ptr(), + domain_ids_str.length(), ids)) + { + info->errmsg= "Out of memory or malformed slave request when " + "obtaining domain ID filter"; + info->error= ER_UNKNOWN_ERROR; + return 1; + } + sort_dynamic(ids, change_master_id_cmp); + } + } } else if (opt_binlog_engine_hton) {