Skip to content

Commit

Permalink
swim: be ready to idle round steps when net is slow
Browse files Browse the repository at this point in the history
First of all, the problem in a nutshell was that ev_timer with
non-zero 'repeat' field in fact is a ev_periodic. It is restarted
*automatically*, even if a user does not write ev_timer_again()
nor ev_timer_start().

This led to a situation, that a round message send is scheduled,
and next round step timer alarm happens before the message is
actually sent. It, in turn, led to an assertion on attempt to
schedule a task twice.

This patch fixes the swim test harness to behave like ev_timer
with 'repeat' > 0, and on first idle round step stops the timer -
it will be restarted once the currently hanging task will be
finally sent.

Follow up #3234
  • Loading branch information
Gerold103 committed May 22, 2019
1 parent 5001ecd commit a3919d8
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 5 deletions.
20 changes: 19 additions & 1 deletion src/lib/swim/swim.c
Expand Up @@ -1077,15 +1077,29 @@ swim_begin_step(struct ev_loop *loop, struct ev_timer *t, int events)
(void) events;
(void) loop;
struct swim *swim = (struct swim *) t->data;
/*
* There are possible false-positive wakeups. They can
* appear, when a round task was scheduled, but event
* loop was too busy to send the task, and the timer
* alarms again. In such a case stop it - it makes no
* sense to waste time on idle wakeups. Completion
* callback will restart the timer.
*/
if (swim_task_is_scheduled(&swim->round_step_task)) {
swim_ev_timer_stop(loop, t);
return;
}
if (! rlist_empty(&swim->round_queue))
say_verbose("SWIM %d: continue the round", swim_fd(swim));
else
swim_new_round(swim);
/*
* Possibly empty, if no members but self are specified.
*/
if (rlist_empty(&swim->round_queue))
if (rlist_empty(&swim->round_queue)) {
swim_ev_timer_stop(loop, t);
return;
}
swim_encode_round_msg(swim);
struct swim_member *m =
rlist_first_entry(&swim->round_queue, struct swim_member,
Expand All @@ -1104,6 +1118,10 @@ swim_complete_step(struct swim_task *task,
(void) rc;
(void) task;
struct swim *swim = swim_by_scheduler(scheduler);
/*
* It could be stopped by the step begin function, if the
* sending was too long.
*/
swim_ev_timer_again(loop(), &swim->round_tick);
/*
* It is possible that the original member was deleted
Expand Down
20 changes: 19 additions & 1 deletion test/unit/swim.c
Expand Up @@ -933,10 +933,27 @@ swim_test_encryption(void)
swim_finish_test();
}

static void
swim_test_slow_net(void)
{
swim_start_test(0);
struct swim_cluster *cluster = swim_cluster_new(2);
swim_cluster_interconnect(cluster, 0, 1);
swim_cluster_block_io(cluster, 0);
swim_cluster_block_io(cluster, 1);

note("slow network leads to idle round steps, they should not produce "\
"a new message");
swim_run_for(5);

swim_cluster_delete(cluster);
swim_finish_test();
}

static int
main_f(va_list ap)
{
swim_start_test(19);
swim_start_test(20);

(void) ap;
swim_test_ev_init();
Expand All @@ -961,6 +978,7 @@ main_f(va_list ap)
swim_test_payload_refutation();
swim_test_indirect_ping();
swim_test_encryption();
swim_test_slow_net();

swim_test_transport_free();
swim_test_ev_free();
Expand Down
7 changes: 6 additions & 1 deletion test/unit/swim.result
@@ -1,5 +1,5 @@
*** main_f ***
1..19
1..20
*** swim_test_one_link ***
1..6
ok 1 - no rounds - no fullmesh
Expand Down Expand Up @@ -195,4 +195,9 @@ ok 18 - subtests
ok 3 - cluster works after encryption has been disabled
ok 19 - subtests
*** swim_test_encryption: done ***
*** swim_test_slow_net ***
1..0
# slow network leads to idle round steps, they should not produce a new message
ok 20 - subtests
*** swim_test_slow_net: done ***
*** main_f: done ***
10 changes: 8 additions & 2 deletions test/unit/swim_test_ev.c
Expand Up @@ -177,18 +177,24 @@ swim_timer_event_delete(struct swim_event *e)
free(te);
}

/** Create a new timer event. */
static void
swim_timer_event_new(struct ev_watcher *watcher, double delay);

/** Process a timer event and delete it. */
static void
swim_timer_event_process(struct swim_event *e, struct ev_loop *loop)
{
assert(e->type == SWIM_EVENT_TIMER);
struct ev_watcher *w = ((struct swim_timer_event *) e)->watcher;
struct ev_timer *t = (struct ev_timer *) w;
swim_timer_event_delete(e);
((struct ev_timer *) w)->at = 0;
t->at = 0;
if (t->repeat > 0)
swim_timer_event_new(w, t->repeat);
ev_invoke(loop, w, EV_TIMER);
}

/** Create a new timer event. */
static void
swim_timer_event_new(struct ev_watcher *watcher, double delay)
{
Expand Down

0 comments on commit a3919d8

Please sign in to comment.