Skip to content

Commit

Permalink
MDEV-9083: Slave IO thread does not handle autoreconnect to restartin…
Browse files Browse the repository at this point in the history
…g Galera Cluster node

Chery-picked commits from codership/mysql-wsrep.

MW-284: Slave I/O retry on ER_COM_UNKNOWN_ERROR

Slave would treat ER_COM_UNKNOWN_ERROR as fatal error and stop.
The fix here is to treat it as a network error and rely on the
built-in mechanism to retry.

MW-284: Add an MTR test
  • Loading branch information
Nirbhay Choubey committed Jun 12, 2016
1 parent c0238be commit 868c2ce
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 0 deletions.
13 changes: 13 additions & 0 deletions mysql-test/suite/galera/r/MW-284.result
@@ -0,0 +1,13 @@
CREATE TABLE t1 (f1 INTEGER) ENGINE=InnoDB;
SET GLOBAL wsrep_provider_options='gmcast.isolate=1';
SET SESSION wsrep_on = OFF;
SET SESSION wsrep_on = ON;
START SLAVE;
include/wait_for_slave_param.inc [Slave_IO_Running]
SET GLOBAL wsrep_provider_options='gmcast.isolate=0';
include/wait_for_slave_to_start.inc
INSERT INTO t1 VALUES (1);
DROP TABLE t1;
STOP SLAVE;
RESET SLAVE ALL;
CALL mtr.add_suppression('failed registering on master');
1 change: 1 addition & 0 deletions mysql-test/suite/galera/t/MW-284.cnf
@@ -0,0 +1 @@
!include ../galera_2nodes_as_master.cnf
57 changes: 57 additions & 0 deletions mysql-test/suite/galera/t/MW-284.test
@@ -0,0 +1,57 @@
#
# MW-284 Slave I/O retry on ER_COM_UNKNOWN_ERROR
#

--source include/galera_cluster.inc
--source include/have_innodb.inc

--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3
--disable_query_log
--eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_PORT=$NODE_MYPORT_1, MASTER_USER='root', MASTER_CONNECT_RETRY=1;
--enable_query_log

--connection node_1
CREATE TABLE t1 (f1 INTEGER) ENGINE=InnoDB;
SET GLOBAL wsrep_provider_options='gmcast.isolate=1';
SET SESSION wsrep_on = OFF;
--let $wait_condition = SELECT VARIABLE_VALUE = 'non-Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'
--source include/wait_condition.inc
SET SESSION wsrep_on = ON;

--connection node_3
START SLAVE;
--sleep 1
--let $slave_param= Slave_IO_Running
--let $slave_param_value= Connecting
--source include/wait_for_slave_param.inc

--connection node_1
SET GLOBAL wsrep_provider_options='gmcast.isolate=0';

# We expect the slave to reconnect and resume replication

--connection node_3
--source include/wait_for_slave_to_start.inc

--connection node_1
INSERT INTO t1 VALUES (1);

--connection node_3
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) > 0 FROM t1
--source include/wait_condition.inc

# Cleanup

--connection node_1
DROP TABLE t1;

--connection node_3
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'
--source include/wait_condition.inc

STOP SLAVE;
RESET SLAVE ALL;

CALL mtr.add_suppression('failed registering on master');
4 changes: 4 additions & 0 deletions sql/slave.cc
Expand Up @@ -1361,6 +1361,10 @@ bool is_network_error(uint errorno)
errorno == ER_NET_READ_INTERRUPTED ||
errorno == ER_SERVER_SHUTDOWN)
return TRUE;
#ifdef WITH_WSREP
if (errorno == ER_UNKNOWN_COM_ERROR)
return TRUE;
#endif

return FALSE;
}
Expand Down

0 comments on commit 868c2ce

Please sign in to comment.