From 49a6d6edabf5d7c1e25279ca98bba3233b6056da Mon Sep 17 00:00:00 2001 From: Martin/Geno Date: Sat, 19 Jan 2019 18:45:59 +0100 Subject: [PATCH] [TASK] add no-respondd --- config_example.toml | 6 ++++++ database/graphite/global.go | 1 + database/influxdb/global.go | 13 +++++++------ database/logging/file.go | 2 +- docs/docs_configuration.md | 22 +++++++++++++++++++++ runtime/node.go | 1 + runtime/nodes.go | 11 ++++++++++- runtime/nodes_config.go | 2 ++ runtime/nodes_ping.go | 38 +++++++++++++++++++++++++++++++++++++ runtime/nodes_ping_test.go | 38 +++++++++++++++++++++++++++++++++++++ runtime/stats.go | 16 ++++++++++------ runtime/stats_test.go | 2 ++ 12 files changed, 138 insertions(+), 14 deletions(-) create mode 100644 runtime/nodes_ping.go create mode 100644 runtime/nodes_ping_test.go diff --git a/config_example.toml b/config_example.toml index a5a3b530..1eedb6f0 100644 --- a/config_example.toml +++ b/config_example.toml @@ -55,6 +55,12 @@ save_interval = "5s" # Set node to offline if not seen within this period offline_after = "10m" +## Verify if node is really down by ping last seen address of node +# send x pings to verify if node is offline (for disable set count < 1) +ping_count = 3 +# timeout of sending ping to a node +ping_timeout = "1s" + ## [[nodes.output.example]] # Each output format has its own config block and needs to be enabled by adding: diff --git a/database/graphite/global.go b/database/graphite/global.go index 62fff662..0cd2cf68 100644 --- a/database/graphite/global.go +++ b/database/graphite/global.go @@ -36,6 +36,7 @@ func (c *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time, s func GlobalStatsFields(name string, stats *runtime.GlobalStats) []graphigo.Metric { return []graphigo.Metric{ {Name: name + ".nodes", Value: stats.Nodes}, + {Name: name + ".nodes.no_respondd", Value: stats.NodesNoRespondd}, {Name: name + ".gateways", Value: stats.Gateways}, {Name: name + ".clients.total", Value: stats.Clients}, {Name: name + ".clients.wifi", Value: stats.ClientsWifi}, diff --git a/database/influxdb/global.go b/database/influxdb/global.go index 6c4330eb..90b82b90 100644 --- a/database/influxdb/global.go +++ b/database/influxdb/global.go @@ -42,12 +42,13 @@ func (conn *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time // GlobalStatsFields returns fields for InfluxDB func GlobalStatsFields(stats *runtime.GlobalStats) map[string]interface{} { return map[string]interface{}{ - "nodes": stats.Nodes, - "gateways": stats.Gateways, - "clients.total": stats.Clients, - "clients.wifi": stats.ClientsWifi, - "clients.wifi24": stats.ClientsWifi24, - "clients.wifi5": stats.ClientsWifi5, + "nodes": stats.Nodes, + "nodes.no_respondd": stats.NodesNoRespondd, + "gateways": stats.Gateways, + "clients.total": stats.Clients, + "clients.wifi": stats.ClientsWifi, + "clients.wifi24": stats.ClientsWifi24, + "clients.wifi5": stats.ClientsWifi5, } } diff --git a/database/logging/file.go b/database/logging/file.go index c9745b0f..47686164 100644 --- a/database/logging/file.go +++ b/database/logging/file.go @@ -50,7 +50,7 @@ func (conn *Connection) InsertLink(link *runtime.Link, time time.Time) { } func (conn *Connection) InsertGlobals(stats *runtime.GlobalStats, time time.Time, site string, domain string) { - conn.log("InsertGlobals: [", time.String(), "] site: ", site, " domain: ", domain, ", nodes: ", stats.Nodes, ", clients: ", stats.Clients, " models: ", len(stats.Models)) + conn.log("InsertGlobals: [", time.String(), "] site: ", site, " domain: ", domain, ", nodes: ", stats.Nodes, " (no respondd: ", stats.NodesNoRespondd, "), clients: ", stats.Clients, " models: ", len(stats.Models)) } func (conn *Connection) PruneNodes(deleteAfter time.Duration) { diff --git a/docs/docs_configuration.md b/docs/docs_configuration.md index 4c5d4067..1291dd42 100644 --- a/docs/docs_configuration.md +++ b/docs/docs_configuration.md @@ -203,6 +203,8 @@ state_path = "/var/lib/yanic/state.json" prune_after = "7d" save_interval = "5s" offline_after = "10m" +ping_count = 3 +ping_timeout = "1s" ``` {% endmethod %} @@ -246,6 +248,26 @@ offline_after = "10m" ``` {% endmethod %} +### ping_count +{% method %} +Verify if node is really down by ping last seen address of node +send x pings to verify if node is offline (for disable set count < 1) +{% sample lang="toml" %} +```toml +ping_count = 3 +``` +{% endmethod %} + + +### ping_timeout +{% method %} +Timeout of sending ping to a node +{% sample lang="toml" %} +```toml +ping_timeout = "1s" +``` +{% endmethod %} + ## [[nodes.output.example]] {% method %} diff --git a/runtime/node.go b/runtime/node.go index 3e2059cb..820c1ee6 100644 --- a/runtime/node.go +++ b/runtime/node.go @@ -13,6 +13,7 @@ type Node struct { Firstseen jsontime.Time `json:"firstseen"` Lastseen jsontime.Time `json:"lastseen"` Online bool `json:"online"` + NoRespondd bool `json:"-"` Statistics *data.Statistics `json:"statistics"` Nodeinfo *data.NodeInfo `json:"nodeinfo"` Neighbours *data.Neighbours `json:"-"` diff --git a/runtime/nodes.go b/runtime/nodes.go index 91e8ac8d..4b41d428 100644 --- a/runtime/nodes.go +++ b/runtime/nodes.go @@ -176,7 +176,16 @@ func (nodes *Nodes) expire() { delete(nodes.List, id) } else if node.Lastseen.Before(offlineAfter) { // set to offline - node.Online = false + if nodes.config.PingCount > 0 && nodes.ping(node) { + node.Online = true + node.NoRespondd = true + + node.Statistics = nil + node.Neighbours = nil + } else { + node.Online = false + node.NoRespondd = false + } } } } diff --git a/runtime/nodes_config.go b/runtime/nodes_config.go index 6d9520ce..69dbf90f 100644 --- a/runtime/nodes_config.go +++ b/runtime/nodes_config.go @@ -7,5 +7,7 @@ type NodesConfig struct { SaveInterval duration.Duration `toml:"save_interval"` // Save nodes periodically OfflineAfter duration.Duration `toml:"offline_after"` // Set node to offline if not seen within this period PruneAfter duration.Duration `toml:"prune_after"` // Remove nodes after n days of inactivity + PingCount int `toml:"ping_count"` // send x pings to verify if node is offline (for disable count < 1) + PingTimeout duration.Duration `toml:"ping_timeout"` // timeout of sending ping to a node Output map[string]interface{} } diff --git a/runtime/nodes_ping.go b/runtime/nodes_ping.go new file mode 100644 index 00000000..d5f7e535 --- /dev/null +++ b/runtime/nodes_ping.go @@ -0,0 +1,38 @@ +package runtime + +import ( + "github.com/bdlm/log" + "github.com/sparrc/go-ping" +) + +func (nodes *Nodes) ping(node *Node) bool { + logNode := log.WithField("node_id", "unknown") + if node.Nodeinfo != nil { + logNode = logNode.WithField("node_id", node.Nodeinfo.NodeID) + } + if node.Address == nil { + logNode.Debug("error no address found") + return false + } + addr := node.Address.IP.String() + if node.Address.IP.IsLinkLocalUnicast() { + addr += "%" + node.Address.Zone + } + + logAddr := logNode.WithField("addr", addr) + + pinger, err := ping.NewPinger(addr) + if err != nil { + logAddr.Debugf("error during ping: %s", err) + return false + } + //pinger.SetPrivileged(true) + pinger.Count = nodes.config.PingCount + pinger.Timeout = nodes.config.PingTimeout.Duration + pinger.Run() // blocks until finished + stats := pinger.Statistics() + logAddr.WithFields(map[string]interface{}{ + "pkg_lost": stats.PacketLoss, + }).Debug("pong") + return stats.PacketLoss < 100 +} diff --git a/runtime/nodes_ping_test.go b/runtime/nodes_ping_test.go new file mode 100644 index 00000000..170f4d14 --- /dev/null +++ b/runtime/nodes_ping_test.go @@ -0,0 +1,38 @@ +package runtime + +import ( + "net" + "testing" + "time" + + "github.com/bdlm/log" + "github.com/stretchr/testify/assert" + + "github.com/FreifunkBremen/yanic/data" +) + +func TestPing(t *testing.T) { + log.SetLevel(log.DebugLevel) + + assert := assert.New(t) + config := &NodesConfig{ + PingCount: 1, + } + config.OfflineAfter.Duration = time.Minute * 10 + // to get default (100%) path of testing + // config.PruneAfter.Duration = time.Hour * 24 * 6 + nodes := &Nodes{ + config: config, + List: make(map[string]*Node), + ifaceToNodeID: make(map[string]string), + } + + node := nodes.Update("expire", &data.ResponseData{NodeInfo: &data.NodeInfo{NodeID: "nodeID-Lola"}}) + node.Address = &net.UDPAddr{Zone: "bat0"} + // error during ping + assert.False(nodes.ping(node)) + + node.Address.IP = net.ParseIP("fe80::1") + // error during ping + assert.False(nodes.ping(node)) +} diff --git a/runtime/stats.go b/runtime/stats.go index 145d8bb2..bdb7d292 100644 --- a/runtime/stats.go +++ b/runtime/stats.go @@ -11,12 +11,13 @@ type CounterMap map[string]uint32 // GlobalStats struct type GlobalStats struct { - Clients uint32 - ClientsWifi uint32 - ClientsWifi24 uint32 - ClientsWifi5 uint32 - Gateways uint32 - Nodes uint32 + Clients uint32 + ClientsWifi uint32 + ClientsWifi24 uint32 + ClientsWifi5 uint32 + Gateways uint32 + Nodes uint32 + NodesNoRespondd uint32 Firmwares CounterMap Models CounterMap @@ -81,6 +82,9 @@ func (s *GlobalStats) Add(node *Node) { s.ClientsWifi5 += stats.Clients.Wifi5 s.ClientsWifi += stats.Clients.Wifi } + if node.NoRespondd { + s.NodesNoRespondd++ + } if node.IsGateway() { s.Gateways++ } diff --git a/runtime/stats_test.go b/runtime/stats_test.go index 93363e86..7e9dfe1b 100644 --- a/runtime/stats_test.go +++ b/runtime/stats_test.go @@ -22,6 +22,7 @@ func TestGlobalStats(t *testing.T) { //check GLOBAL_SITE stats assert.EqualValues(1, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Gateways) assert.EqualValues(3, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Nodes) + assert.EqualValues(1, stats[GLOBAL_SITE][GLOBAL_DOMAIN].NodesNoRespondd) assert.EqualValues(25, stats[GLOBAL_SITE][GLOBAL_DOMAIN].Clients) // check models @@ -99,6 +100,7 @@ func createTestNodes() *Nodes { nodes.AddNode(&Node{ Online: true, + NoRespondd: true, Statistics: &data.Statistics{ Clients: data.Clients{ Total: 2,