Skip to content
/ server Public

Commit 080d92a

Browse files
plampiojanlindstrom
authored andcommitted
MDEV-31517 Wrong variable name in the configuration leads Galera to
think SST/IST failed, at next restart will request a full SST This patch fixes an unwanted behavior of a Galera cluster node when Server startup fails because of an error in configuration file: after the failure full SST is requested at the next Server startup even though full SST is not needed (MDEV-31517). If Server startup fails because of a configuration error, this patch ensures that Galera state of the failing node remains unchanged. This avoids full SST at the next Server restart. This fix consists of three patches for the following components: 1) Server, 2) WSREP library, 3) Galera.
1 parent c331d53 commit 080d92a

File tree

4 files changed

+164
-2
lines changed

4 files changed

+164
-2
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
connection node_2;
2+
connection node_1;
3+
connection node_2;
4+
CALL mtr.add_suppression("unknown variable 'non_existing_variable=ON'");
5+
CALL mtr.add_suppression("Aborting");
6+
CALL mtr.add_suppression("sst_received failed: State wait was interrupted");
7+
CALL mtr.add_suppression("State transfer interrupted, shutting down gracefully");
8+
connection node_1;
9+
CREATE TABLE t(i INT NOT NULL PRIMARY KEY) ENGINE INNODB;
10+
INSERT INTO t VALUES(1);
11+
connection node_2;
12+
connection node_1;
13+
connection node_2;
14+
connection node_1;
15+
connection node_2;
16+
connection node_1;
17+
connection node_2;
18+
Starting server ...
19+
Starting server ...
20+
SET GLOBAL wsrep_mode = DEFAULT;
21+
connection node_1;
22+
DROP TABLE t;
23+
disconnect node_2;
24+
disconnect node_1;
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#
2+
# Test for MDEV-31517: Wrong variable name in the configuration leads
3+
# Galera to think SST/IST failed, at next restart will request a full
4+
# SST.
5+
#
6+
# To reproduce:
7+
#
8+
# 1. Start Galera cluster
9+
# 2. Stop a Node
10+
# 3. Start the node
11+
# 4. Stop a Node
12+
# 5. Add non_existing_variable=ON in the config
13+
# 6. Start the node, this will fail
14+
# 7 Remove non_existing_variable=ON from the config file
15+
# 8 Restart the server
16+
# 9. Observe a full SST happening
17+
#
18+
# This test checks that an IST takes place at Step 9 instead of a full SST.
19+
20+
# Step 1: Start Galera cluster
21+
--source include/galera_cluster.inc
22+
--source include/have_mariabackup.inc
23+
--echo # Make sure that the test is operating on the right version of galera library.
24+
--let $galera_version=26.4.25
25+
source ../wsrep/include/check_galera_version.inc;
26+
27+
# Suppress expected errors and warnings:
28+
--connection node_2
29+
CALL mtr.add_suppression("unknown variable 'non_existing_variable=ON'");
30+
CALL mtr.add_suppression("Aborting");
31+
CALL mtr.add_suppression("sst_received failed: State wait was interrupted");
32+
CALL mtr.add_suppression("State transfer interrupted, shutting down gracefully");
33+
34+
# Count the number of "SST completed" messages in the log file before
35+
# and after testing. To do this we need to save original log file
36+
# before testing:
37+
#
38+
--let TEST_LOG=$MYSQLTEST_VARDIR/log/mysqld.2.err
39+
--perl
40+
use strict;
41+
my $test_log=$ENV{'TEST_LOG'} or die "TEST_LOG not set";
42+
my $test_log_copy=$test_log . '.copy';
43+
if (-e $test_log_copy) {
44+
unlink $test_log_copy;
45+
}
46+
EOF
47+
--copy_file $TEST_LOG $TEST_LOG.copy
48+
49+
--connection node_1
50+
CREATE TABLE t(i INT NOT NULL PRIMARY KEY) ENGINE INNODB;
51+
INSERT INTO t VALUES(1);
52+
53+
# Step 2: Stop node 2
54+
--connection node_2
55+
--source include/shutdown_mysqld.inc
56+
57+
--connection node_1
58+
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'
59+
--source include/wait_condition.inc
60+
61+
# Step 3: Start node 2
62+
--connection node_2
63+
--source include/start_mysqld.inc
64+
65+
--connection node_1
66+
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'
67+
--source include/wait_condition.inc
68+
69+
# Step 4: Stop node 2
70+
--connection node_2
71+
let $MYSQLD_DATADIR= `SELECT @@datadir`;
72+
--source include/shutdown_mysqld.inc
73+
74+
--connection node_1
75+
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'
76+
--source include/wait_condition.inc
77+
78+
# Step 5: Add non_existing_variable=ON in the config
79+
--exec cp $MYSQLTEST_VARDIR/my.cnf $MYSQLTEST_VARDIR/my.cnf-orig
80+
--exec echo '[mysqld.2]' >> $MYSQLTEST_VARDIR/my.cnf
81+
--exec echo 'non_existing_variable=ON' >> $MYSQLTEST_VARDIR/my.cnf
82+
83+
# Step 6: start the stopped node, this will fail
84+
--connection node_2
85+
--exec cp -p $MYSQLD_DATADIR/grastate.dat $MYSQLD_DATADIR/grastate.dat_before
86+
--echo Starting server ...
87+
--error 1
88+
--exec $MYSQLD --defaults-group-suffix=.2 --defaults-file=$MYSQLTEST_VARDIR/my.cnf | grep 'non_existing_variable'
89+
--exec cp -p $MYSQLD_DATADIR/grastate.dat $MYSQLD_DATADIR/grastate.dat_after
90+
91+
# Step 7: remove the wrong variable in the config file
92+
--exec cp $MYSQLTEST_VARDIR/my.cnf-orig $MYSQLTEST_VARDIR/my.cnf
93+
94+
# Step 8: Start the node
95+
--echo Starting server ...
96+
let $restart_noprint=2;
97+
--source include/start_mysqld.inc
98+
99+
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
100+
--source include/wait_condition.inc
101+
102+
--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready';
103+
--source include/wait_condition.inc
104+
105+
# cleanup
106+
SET GLOBAL wsrep_mode = DEFAULT;
107+
108+
--connection node_1
109+
DROP TABLE t;
110+
111+
# Count the number of "SST completed" messages in the log file during
112+
# test phase - to print the error message if the number of such
113+
# messages in log file increased at the end of the test:
114+
#
115+
--perl
116+
use strict;
117+
my $test_log=$ENV{'TEST_LOG'} or die "TEST_LOG not set";
118+
my $test_log_copy=$test_log . '.copy';
119+
open(FILE, $test_log_copy) or die("Unable to open $test_log_copy: $!\n");
120+
my $initial=grep(/SST completed/gi,<FILE>);
121+
close(FILE);
122+
open(FILE, $test_log) or die("Unable to open $test_log: $!\n");
123+
my $final=grep(/SST completed/gi,<FILE>);
124+
close(FILE);
125+
if ($final != $initial) {
126+
my $diff=$final-$initial;
127+
print("Full WSREP SST performed $diff times.\n");
128+
}
129+
EOF
130+
--remove_file $TEST_LOG.copy
131+
132+
--source include/galera_end.inc

sql/mysqld.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5361,8 +5361,14 @@ static int init_server_components()
53615361
#endif
53625362

53635363
if ((ho_error= handle_options(&remaining_argc, &remaining_argv, removed_opts,
5364-
mysqld_get_one_option)))
5364+
mysqld_get_one_option))) {
5365+
#ifdef WITH_WSREP
5366+
Wsrep_server_state::instance().disable_node_reset();
5367+
#endif
5368+
53655369
unireg_abort(ho_error);
5370+
}
5371+
53665372
/* Add back the program name handle_options removes */
53675373
remaining_argc++;
53685374
remaining_argv--;

wsrep-lib

0 commit comments

Comments
 (0)