Skip to content

Commit ead62d3

Browse files
committed
swim: introduce "suspected" status
Now a member dies "gradually". After some failed pings it is declared as suspected. After more failed pings it is finaly dead. New members in a config are declared as suspected because the instance can not be sure whether they are alive or not. Follow up #3234
1 parent 4e95033 commit ead62d3

File tree

1 file changed

+23
-3
lines changed

1 file changed

+23
-3
lines changed

src/lib/swim/swim.c

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,12 @@ enum swim_member_status {
156156
* members table.
157157
*/
158158
MEMBER_ALIVE = 0,
159+
/**
160+
* If a member has not responded to a ping, it is declared
161+
* as suspected to be dead. After more failed pings it
162+
* is finaly dead.
163+
*/
164+
MEMBER_SUSPECTED,
159165
/**
160166
* The member is considered to be dead. It will disappear
161167
* from the membership, if it is not pinned.
@@ -166,6 +172,7 @@ enum swim_member_status {
166172

167173
static const char *swim_member_status_strs[] = {
168174
"alive",
175+
"suspected",
169176
"dead",
170177
};
171178

@@ -596,9 +603,15 @@ enum {
596603
ACK_TIMEOUT = 1,
597604
/**
598605
* If a member has not been responding to pings this
599-
* number of times, it is considered to be dead.
606+
* number of times, it is suspected to be dead. To confirm
607+
* the death it should fail more pings.
600608
*/
601-
NO_ACKS_TO_DEAD = 3,
609+
NO_ACKS_TO_SUSPECT = 2,
610+
/**
611+
* If a member is suspected to be dead, after this number
612+
* of failed pings its death is confirmed.
613+
*/
614+
NO_ACKS_TO_DEAD = NO_ACKS_TO_SUSPECT + 2,
602615
/**
603616
* If a not pinned member confirmed to be dead, it is
604617
* removed from the membership after at least this number
@@ -1010,6 +1023,9 @@ swim_check_acks(struct ev_loop *loop, struct ev_periodic *p, int events)
10101023
if (m->failed_pings >= NO_ACKS_TO_DEAD) {
10111024
m->status = MEMBER_DEAD;
10121025
swim_member_is_updated(m);
1026+
} else if (m->failed_pings >= NO_ACKS_TO_SUSPECT) {
1027+
m->status = MEMBER_SUSPECTED;
1028+
swim_member_is_updated(m);
10131029
}
10141030
swim_io_task_push(&m->ping_task);
10151031
rlist_del_entry(m, in_queue_wait_ack);
@@ -1489,7 +1505,11 @@ swim_cfg(const char **member_uris, int member_uri_count, const char *server_uri,
14891505
free(cfg);
14901506
for (int i = 0; i < new_cfg_size; ++i) {
14911507
new_cfg[i]->is_pinned = true;
1492-
new_cfg[i]->status = MEMBER_ALIVE;
1508+
/*
1509+
* Real status is unknown, so a new member
1510+
* can not be alive.
1511+
*/
1512+
new_cfg[i]->status = MEMBER_SUSPECTED;
14931513
}
14941514
cfg = new_cfg;
14951515
cfg_size = new_cfg_size;

0 commit comments

Comments
 (0)