Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rename alert collector to ha_check and disable it #36

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 20 additions & 20 deletions collector/alert/alert.go → collector/ha_check/ha_check.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package alert
package ha_check

import (
"github.com/pkg/errors"
Expand All @@ -10,26 +10,26 @@ import (
"github.com/SUSE/sap_host_exporter/internal/sapcontrol"
)

func NewCollector(webService sapcontrol.WebService) (*alertCollector, error) {
func NewCollector(webService sapcontrol.WebService) (*checkCollector, error) {

c := &alertCollector{
collector.NewDefaultCollector("alert"),
c := &checkCollector{
collector.NewDefaultCollector("ha_check"),
webService,
}

c.SetDescriptor("ha_check", "High Availability system configuration and status checks", []string{"description", "category", "comment"})
c.SetDescriptor("ha_failover_active", "Whether or not High Availability Failover is active", nil)
c.SetDescriptor("config", "High Availability system configuration and status checks", []string{"description", "category", "comment"})
c.SetDescriptor("failover_active", "Whether or not High Availability Failover is active", nil)

return c, nil
}

type alertCollector struct {
type checkCollector struct {
collector.DefaultCollector
webService sapcontrol.WebService
}

func (c *alertCollector) Collect(ch chan<- prometheus.Metric) {
log.Debugln("Collecting Alert metrics")
func (c *checkCollector) Collect(ch chan<- prometheus.Metric) {
log.Debugln("Collecting Check metrics")

errs := collector.RecordConcurrently([]func(ch chan<- prometheus.Metric) error{
c.recordHAConfigChecks,
Expand All @@ -38,61 +38,61 @@ func (c *alertCollector) Collect(ch chan<- prometheus.Metric) {
}, ch)

for _, err := range errs {
log.Warnf("Alert Collector scrape failed: %s", err)
log.Warnf("Check Collector scrape failed: %s", err)
}
}

func (c *alertCollector) recordHAConfigChecks(ch chan<- prometheus.Metric) error {
func (c *checkCollector) recordHAConfigChecks(ch chan<- prometheus.Metric) error {
response, err := c.webService.HACheckConfig()
if err != nil {
return errors.Wrap(err, "SAPControl web service error")
}

err = c.recordHAChecks(response.Checks, ch)
err = c.recordConfigChecks(response.Checks, ch)
if err != nil {
return err
}

return nil
}

func (c *alertCollector) recordHAFailoverConfigChecks(ch chan<- prometheus.Metric) error {
func (c *checkCollector) recordHAFailoverConfigChecks(ch chan<- prometheus.Metric) error {
response, err := c.webService.HACheckFailoverConfig()

if err != nil {
return errors.Wrap(err, "SAPControl web service error")
}

err = c.recordHAChecks(response.Checks, ch)
err = c.recordConfigChecks(response.Checks, ch)
if err != nil {
return errors.Wrap(err, "could not record HACheck")
}

return nil
}

func (c *alertCollector) recordHAChecks(checks []*sapcontrol.HACheck, ch chan<- prometheus.Metric) error {
func (c *checkCollector) recordConfigChecks(checks []*sapcontrol.HACheck, ch chan<- prometheus.Metric) error {
for _, check := range checks {
err := c.recordHACheck(check, ch)
err := c.recordConfigCheck(check, ch)
if err != nil {
return err
}
}
return nil
}

func (c *alertCollector) recordHACheck(check *sapcontrol.HACheck, ch chan<- prometheus.Metric) error {
func (c *checkCollector) recordConfigCheck(check *sapcontrol.HACheck, ch chan<- prometheus.Metric) error {
stateCode, err := sapcontrol.HaVerificationStateToFloat(check.State)
category, err := sapcontrol.HaCheckCategoryToString(check.Category)
if err != nil {
return errors.Wrapf(err, "unable to process SAPControl HACheck data: %v", *check)
}
ch <- c.MakeGaugeMetric("ha_check", stateCode, check.Description, category, check.Comment)
ch <- c.MakeGaugeMetric("config", stateCode, check.Description, category, check.Comment)

return nil
}

func (c *alertCollector) recordHAFailoverActive(ch chan<- prometheus.Metric) error {
func (c *checkCollector) recordHAFailoverActive(ch chan<- prometheus.Metric) error {
response, err := c.webService.HAGetFailoverConfig()

if err != nil {
Expand All @@ -103,7 +103,7 @@ func (c *alertCollector) recordHAFailoverActive(ch chan<- prometheus.Metric) err
if response.HAActive {
haActive = 1
}
ch <- c.MakeGaugeMetric("ha_failover_active", haActive)
ch <- c.MakeGaugeMetric("failover_active", haActive)

return nil
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package alert
package ha_check

import (
"strings"
Expand Down Expand Up @@ -49,15 +49,15 @@ func TestHACheckMetrics(t *testing.T) {
assert.NoError(t, err)

expectedMetrics := `
# HELP sap_alert_ha_check High Availability system configuration and status checks
# TYPE sap_alert_ha_check gauge
sap_alert_ha_check{category="HA-STATE",comment="bar",description="foo"} 2
sap_alert_ha_check{category="SAP-STATE",comment="bar2",description="foo2"} 1
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="bar3",description="foo3"} 0
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="bar4",description="foo4"} 0
# HELP sap_ha_check_config High Availability system configuration and status checks
# TYPE sap_ha_check_config gauge
sap_ha_check_config{category="HA-STATE",comment="bar",description="foo"} 2
sap_ha_check_config{category="SAP-STATE",comment="bar2",description="foo2"} 1
sap_ha_check_config{category="SAP-CONFIGURATION",comment="bar3",description="foo3"} 0
sap_ha_check_config{category="SAP-CONFIGURATION",comment="bar4",description="foo4"} 0
`

err = testutil.CollectAndCompare(collector, strings.NewReader(expectedMetrics), "sap_alert_ha_check")
err = testutil.CollectAndCompare(collector, strings.NewReader(expectedMetrics), "sap_ha_check_config")
assert.NoError(t, err)
}

Expand All @@ -75,7 +75,7 @@ func TestHACheckMetricsWithEmptyData(t *testing.T) {
collector, err := NewCollector(mockWebService)
assert.NoError(t, err)

err = testutil.CollectAndCompare(collector, strings.NewReader(""), "sap_alert_ha_check")
err = testutil.CollectAndCompare(collector, strings.NewReader(""), "sap_ha_check_config")
assert.NoError(t, err)
}

Expand All @@ -96,11 +96,11 @@ func TestHAFailoverActiveMetric(t *testing.T) {
assert.NoError(t, err)

expectedMetrics := `
# HELP sap_alert_ha_failover_active Whether or not High Availability Failover is active
# TYPE sap_alert_ha_failover_active gauge
sap_alert_ha_failover_active 1
# HELP sap_ha_check_failover_active Whether or not High Availability Failover is active
# TYPE sap_ha_check_failover_active gauge
sap_ha_check_failover_active 1
`
err = testutil.CollectAndCompare(collector, strings.NewReader(expectedMetrics), "sap_alert_ha_failover_active")
err = testutil.CollectAndCompare(collector, strings.NewReader(expectedMetrics), "sap_ha_check_failover_active")
assert.NoError(t, err)
}

Expand All @@ -121,10 +121,10 @@ func TestHAFailoverActiveMetricWithFalseValue(t *testing.T) {
assert.NoError(t, err)

expectedMetrics := `
# HELP sap_alert_ha_failover_active Whether or not High Availability Failover is active
# TYPE sap_alert_ha_failover_active gauge
sap_alert_ha_failover_active 0
# HELP sap_ha_check_failover_active Whether or not High Availability Failover is active
# TYPE sap_ha_check_failover_active gauge
sap_ha_check_failover_active 0
`
err = testutil.CollectAndCompare(collector, strings.NewReader(expectedMetrics), "sap_alert_ha_failover_active")
err = testutil.CollectAndCompare(collector, strings.NewReader(expectedMetrics), "sap_ha_check_failover_active")
assert.NoError(t, err)
}
66 changes: 33 additions & 33 deletions doc/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ These are the currently implemented subsystems.

1. [SAP Start Service](#sap-start-service)
2. [SAP Enqueue Server](#sap-enqueue-server)
3. [Alerts](#alerts)
3. [HA Checks](#ha-checks)

## SAP Start Service

Expand Down Expand Up @@ -476,16 +476,16 @@ sap_dispatcher_queue_reads{type="ICM/Intern"} 37426
```


## Alerts
## HA Checks

A SAP system has multiple internal monitoring mechanisms, and we monitor all of them under the `alerts` metrics subsystem.
SAP systems have an internal monitoring mechanisms, one of which is dedicated to High Availability configuration and runtime status checks.

1. [`sap_alert_ha_check`](#sap_alert_ha_check)
2. [`sap_alert_ha_failover_active`](#sap_alert_ha_check)
1. [`sap_ha_check_config`](#sap_ha_check_config)
2. [`sap_ha_check_failover_active`](#sap_ha_check_failover_active)

### `sap_alert_ha_check`
### `sap_ha_check_config`

This metric represents various High Availability system configuration and status checks.
This metric represents various High Availability system configuration checks.

Each check can be identified its labels, while the value is an integer status code, as follows.
- `0`: success.
Expand All @@ -501,40 +501,40 @@ Each check can be identified its labels, while the value is an integer status co
#### Example

```
# TYPE sap_alert_ha_check gauge
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="0 Java instances detected",description="Redundant Java instance configuration"} 0
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="2 ABAP instances detected",description="Redundant ABAP instance configuration"} 0
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="2 ABAP instances with BATCH service detected",description="Redundant ABAP BATCH service configuration"} 0
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="2 ABAP instances with DIALOG service detected",description="Redundant ABAP DIALOG service configuration"} 0
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="2 ABAP instances with SPOOL service detected",description="Redundant ABAP SPOOL service configuration"} 0
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="2 ABAP instances with UPDATE service detected",description="Redundant ABAP UPDATE service configuration"} 0
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="ABAP instances on multiple hosts detected",description="ABAP instances on multiple hosts"} 0
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="All Enqueue server separated from application server",description="Enqueue separation"} 0
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="All MessageServer separated from application server",description="MessageServer separation"} 0
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="Enqueue replication enabled",description="Enqueue replication (sapha1as_HA1_00)"} 0
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="SAPInstance includes is-ers patch",description="SAPInstance RA sufficient version"} 0
sap_alert_ha_check{category="SAP-CONFIGURATION",comment="SAPInstance includes is-ers patch",description="SAPInstance RA sufficient version (sapha1as_HA1_00)"} 0
sap_alert_ha_check{category="SAP-STATE",comment="2 ABAP instances with active BATCH service detected",description="Redundant ABAP BATCH service state"} 0
sap_alert_ha_check{category="SAP-STATE",comment="2 ABAP instances with active DIALOG service detected",description="Redundant ABAP DIALOG service state"} 0
sap_alert_ha_check{category="SAP-STATE",comment="2 ABAP instances with active SPOOL service detected",description="Redundant ABAP SPOOL service state"} 0
sap_alert_ha_check{category="SAP-STATE",comment="2 ABAP instances with active UPDATE service detected",description="Redundant ABAP UPDATE service state"} 0
sap_alert_ha_check{category="SAP-STATE",comment="ABAP instances with active ABAP BATCH service on multiple hosts detected",description="ABAP instances with ABAP BATCH service on multiple hosts"} 0
sap_alert_ha_check{category="SAP-STATE",comment="ABAP instances with active ABAP DIALOG service on multiple hosts detected",description="ABAP instances with ABAP DIALOG service on multiple hosts"} 0
sap_alert_ha_check{category="SAP-STATE",comment="ABAP instances with active ABAP SPOOL service on multiple hosts detected",description="ABAP instances with ABAP SPOOL service on multiple hosts"} 0
sap_alert_ha_check{category="SAP-STATE",comment="ABAP instances with active ABAP UPDATE service on multiple hosts detected",description="ABAP instances with ABAP UPDATE service on multiple hosts"} 0
sap_alert_ha_check{category="SAP-STATE",comment="Enqueue replication not active",description="Enqueue replication state (sapha1as_HA1_00)"} 2
sap_alert_ha_check{category="SAP-STATE",comment="SCS instance status ok",description="SCS instance running"} 0
# TYPE sap_ha_check_config gauge
sap_ha_check_config{category="SAP-CONFIGURATION",comment="0 Java instances detected",description="Redundant Java instance configuration"} 0
sap_ha_check_config{category="SAP-CONFIGURATION",comment="2 ABAP instances detected",description="Redundant ABAP instance configuration"} 0
sap_ha_check_config{category="SAP-CONFIGURATION",comment="2 ABAP instances with BATCH service detected",description="Redundant ABAP BATCH service configuration"} 0
sap_ha_check_config{category="SAP-CONFIGURATION",comment="2 ABAP instances with DIALOG service detected",description="Redundant ABAP DIALOG service configuration"} 0
sap_ha_check_config{category="SAP-CONFIGURATION",comment="2 ABAP instances with SPOOL service detected",description="Redundant ABAP SPOOL service configuration"} 0
sap_ha_check_config{category="SAP-CONFIGURATION",comment="2 ABAP instances with UPDATE service detected",description="Redundant ABAP UPDATE service configuration"} 0
sap_ha_check_config{category="SAP-CONFIGURATION",comment="ABAP instances on multiple hosts detected",description="ABAP instances on multiple hosts"} 0
sap_ha_check_config{category="SAP-CONFIGURATION",comment="All Enqueue server separated from application server",description="Enqueue separation"} 0
sap_ha_check_config{category="SAP-CONFIGURATION",comment="All MessageServer separated from application server",description="MessageServer separation"} 0
sap_ha_check_config{category="SAP-CONFIGURATION",comment="Enqueue replication enabled",description="Enqueue replication (sapha1as_HA1_00)"} 0
sap_ha_check_config{category="SAP-CONFIGURATION",comment="SAPInstance includes is-ers patch",description="SAPInstance RA sufficient version"} 0
sap_ha_check_config{category="SAP-CONFIGURATION",comment="SAPInstance includes is-ers patch",description="SAPInstance RA sufficient version (sapha1as_HA1_00)"} 0
sap_ha_check_config{category="SAP-STATE",comment="2 ABAP instances with active BATCH service detected",description="Redundant ABAP BATCH service state"} 0
sap_ha_check_config{category="SAP-STATE",comment="2 ABAP instances with active DIALOG service detected",description="Redundant ABAP DIALOG service state"} 0
sap_ha_check_config{category="SAP-STATE",comment="2 ABAP instances with active SPOOL service detected",description="Redundant ABAP SPOOL service state"} 0
sap_ha_check_config{category="SAP-STATE",comment="2 ABAP instances with active UPDATE service detected",description="Redundant ABAP UPDATE service state"} 0
sap_ha_check_config{category="SAP-STATE",comment="ABAP instances with active ABAP BATCH service on multiple hosts detected",description="ABAP instances with ABAP BATCH service on multiple hosts"} 0
sap_ha_check_config{category="SAP-STATE",comment="ABAP instances with active ABAP DIALOG service on multiple hosts detected",description="ABAP instances with ABAP DIALOG service on multiple hosts"} 0
sap_ha_check_config{category="SAP-STATE",comment="ABAP instances with active ABAP SPOOL service on multiple hosts detected",description="ABAP instances with ABAP SPOOL service on multiple hosts"} 0
sap_ha_check_config{category="SAP-STATE",comment="ABAP instances with active ABAP UPDATE service on multiple hosts detected",description="ABAP instances with ABAP UPDATE service on multiple hosts"} 0
sap_ha_check_config{category="SAP-STATE",comment="Enqueue replication not active",description="Enqueue replication state (sapha1as_HA1_00)"} 2
sap_ha_check_config{category="SAP-STATE",comment="SCS instance status ok",description="SCS instance running"} 0
```

### `sap_alert_ha_failover_active`
### `sap_ha_check_failover_active`

Whether or not High Availability Failover is active, 0 being false and 1 being true.

#### Example

```
# TYPE sap_alert_ha_failover_active gauge
sap_alert_ha_failover_active 1
# TYPE sap_ha_check_failover_active gauge
sap_ha_check_failover_active 1
```

## Appendix
Expand Down
9 changes: 5 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
log "github.com/sirupsen/logrus"
flag "github.com/spf13/pflag"

"github.com/SUSE/sap_host_exporter/collector/alert"
"github.com/SUSE/sap_host_exporter/collector/dispatcher"
"github.com/SUSE/sap_host_exporter/collector/enqueue_server"
"github.com/SUSE/sap_host_exporter/collector/start_service"
Expand Down Expand Up @@ -64,13 +63,15 @@ func main() {
log.Info("Dispatcher collector registered")
}

alertCollector, err := alert.NewCollector(webService)
/* disabled due to sapstartsvc upstream issues
HACheckCollector, err := ha_check.NewCollector(webService)
if err != nil {
log.Warn(err)
} else {
prometheus.MustRegister(alertCollector)
log.Info("Alert collector registered")
prometheus.MustRegister(HACheckCollector)
log.Info("Check collector registered")
}
*/

// if we're not in debug log level, we unregister the Go runtime metrics collector that gets registered by default
if !log.IsLevelEnabled(log.DebugLevel) {
Expand Down