@@ -156,6 +156,12 @@ enum swim_member_status {
156
156
* members table.
157
157
*/
158
158
MEMBER_ALIVE = 0 ,
159
+ /**
160
+ * If a member has not responded to a ping, it is declared
161
+ * as suspected to be dead. After more failed pings it
162
+ * is finaly dead.
163
+ */
164
+ MEMBER_SUSPECTED ,
159
165
/**
160
166
* The member is considered to be dead. It will disappear
161
167
* from the membership, if it is not pinned.
@@ -166,6 +172,7 @@ enum swim_member_status {
166
172
167
173
static const char * swim_member_status_strs [] = {
168
174
"alive" ,
175
+ "suspected" ,
169
176
"dead" ,
170
177
};
171
178
@@ -596,9 +603,15 @@ enum {
596
603
ACK_TIMEOUT = 1 ,
597
604
/**
598
605
* If a member has not been responding to pings this
599
- * number of times, it is considered to be dead.
606
+ * number of times, it is suspected to be dead. To confirm
607
+ * the death it should fail more pings.
600
608
*/
601
- NO_ACKS_TO_DEAD = 3 ,
609
+ NO_ACKS_TO_SUSPECT = 2 ,
610
+ /**
611
+ * If a member is suspected to be dead, after this number
612
+ * of failed pings its death is confirmed.
613
+ */
614
+ NO_ACKS_TO_DEAD = NO_ACKS_TO_SUSPECT + 2 ,
602
615
/**
603
616
* If a not pinned member confirmed to be dead, it is
604
617
* removed from the membership after at least this number
@@ -1010,6 +1023,9 @@ swim_check_acks(struct ev_loop *loop, struct ev_periodic *p, int events)
1010
1023
if (m -> failed_pings >= NO_ACKS_TO_DEAD ) {
1011
1024
m -> status = MEMBER_DEAD ;
1012
1025
swim_member_is_updated (m );
1026
+ } else if (m -> failed_pings >= NO_ACKS_TO_SUSPECT ) {
1027
+ m -> status = MEMBER_SUSPECTED ;
1028
+ swim_member_is_updated (m );
1013
1029
}
1014
1030
swim_io_task_push (& m -> ping_task );
1015
1031
rlist_del_entry (m , in_queue_wait_ack );
@@ -1489,7 +1505,11 @@ swim_cfg(const char **member_uris, int member_uri_count, const char *server_uri,
1489
1505
free (cfg );
1490
1506
for (int i = 0 ; i < new_cfg_size ; ++ i ) {
1491
1507
new_cfg [i ]-> is_pinned = true;
1492
- new_cfg [i ]-> status = MEMBER_ALIVE ;
1508
+ /*
1509
+ * Real status is unknown, so a new member
1510
+ * can not be alive.
1511
+ */
1512
+ new_cfg [i ]-> status = MEMBER_SUSPECTED ;
1493
1513
}
1494
1514
cfg = new_cfg ;
1495
1515
cfg_size = new_cfg_size ;
0 commit comments