Skip to content

Commit

Permalink
[Discovery] immediately start Master|Node fault detection pinging
Browse files Browse the repository at this point in the history
After a node joins the clusters, it starts pinging the master to verify it's health. Before, the cluster join request was processed async and we had to give some time to complete. With  #6480 we changed this to wait for the join process to complete on the master. We can therefore start pinging immediately for fast detection of failures. Similar change can be made to the Node fault detection from the master side.

Closes #6706
  • Loading branch information
bleskes committed Jul 12, 2014
1 parent 6b8bf2a commit 432042f
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
Expand Up @@ -153,8 +153,9 @@ private void innerStart(final DiscoveryNode masterNode) {
masterPinger.stop();
}
this.masterPinger = new MasterPinger();
// start the ping process
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);

// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
}

public void stop(String reason) {
Expand Down Expand Up @@ -198,7 +199,8 @@ private void handleTransportDisconnect(DiscoveryNode node) {
masterPinger.stop();
}
this.masterPinger = new MasterPinger();
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
} catch (Exception e) {
logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode);
notifyMasterFailure(masterNode, "transport disconnected (with verified connect)");
Expand Down
Expand Up @@ -119,7 +119,8 @@ public void updateNodes(DiscoveryNodes nodes) {
}
if (!nodesFD.containsKey(newNode)) {
nodesFD.put(newNode, new NodeFD());
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(newNode));
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(newNode));
}
}
for (DiscoveryNode removedNode : delta.removedNodes()) {
Expand Down Expand Up @@ -165,7 +166,8 @@ private void handleTransportDisconnect(DiscoveryNode node) {
try {
transportService.connectToNode(node);
nodesFD.put(node, new NodeFD());
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(node));
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(node));
} catch (Exception e) {
logger.trace("[node ] [{}] transport disconnected (with verified connect)", node);
notifyNodeFailure(node, "transport disconnected (with verified connect)");
Expand Down

0 comments on commit 432042f

Please sign in to comment.