diff --git a/cmd/collectors/commonutils.go b/cmd/collectors/commonutils.go
index 71413e03e..847cc7722 100644
--- a/cmd/collectors/commonutils.go
+++ b/cmd/collectors/commonutils.go
@@ -2,14 +2,19 @@ package collectors
import (
"github.com/netapp/harvest/v2/cmd/tools/rest"
+ "github.com/netapp/harvest/v2/pkg/errs"
"github.com/netapp/harvest/v2/pkg/logging"
"github.com/netapp/harvest/v2/pkg/matrix"
+ "github.com/netapp/harvest/v2/pkg/tree/node"
"github.com/tidwall/gjson"
"strings"
"time"
)
-const DefaultBatchSize = "500"
+const (
+ DefaultBatchSize = "500"
+ MaxAllowedTimeDrift = 10 * time.Second
+)
func InvokeRestCall(client *rest.Client, href string, logger *logging.Logger) ([]gjson.Result, error) {
result, err := rest.Fetch(client, href)
@@ -25,6 +30,70 @@ func InvokeRestCall(client *rest.Client, href string, logger *logging.Logger) ([
return result, nil
}
+func GetClusterTime(client *rest.Client, returnTimeOut string, logger *logging.Logger) (time.Time, error) {
+ var (
+ err error
+ records []gjson.Result
+ clusterTime time.Time
+ timeOfNodes []int64
+ )
+
+ query := "private/cli/cluster/date"
+ fields := []string{"date"}
+
+ href := rest.BuildHref(query, strings.Join(fields, ","), nil, "", "", "1", returnTimeOut, "")
+
+ if records, err = rest.Fetch(client, href); err != nil {
+ return clusterTime, err
+ }
+ if len(records) == 0 {
+ return clusterTime, errs.New(errs.ErrConfig, " date not found on cluster")
+ }
+
+ for _, instanceData := range records {
+ currentClusterDate := instanceData.Get("date")
+ if currentClusterDate.Exists() {
+ t, err := time.Parse(time.RFC3339, currentClusterDate.String())
+ if err != nil {
+ logger.Error().Str("date", currentClusterDate.String()).Err(err).Msg("Failed to load cluster date")
+ continue
+ }
+ clusterTime = t
+ timeOfNodes = append(timeOfNodes, t.UnixNano())
+ }
+ }
+
+ for _, timeOfEachNode := range timeOfNodes {
+ timeDrift := time.Duration(timeOfEachNode - timeOfNodes[0]).Abs()
+ if timeDrift >= MaxAllowedTimeDrift {
+ logger.Warn().Float64("timedrift(in sec)", timeDrift.Seconds()).Msg("Time drift exist among the nodes")
+ break
+ }
+ }
+
+ logger.Debug().Str("cluster time", clusterTime.String()).Msg("")
+ return clusterTime, nil
+}
+
+// GetDataInterval fetch pollData interval
+func GetDataInterval(param *node.Node, defaultInterval time.Duration) (time.Duration, error) {
+ var dataIntervalStr string
+ var durationVal time.Duration
+ var err error
+ schedule := param.GetChildS("schedule")
+ if schedule != nil {
+ dataInterval := schedule.GetChildS("data")
+ if dataInterval != nil {
+ dataIntervalStr = dataInterval.GetContentS()
+ if durationVal, err = time.ParseDuration(dataIntervalStr); err == nil {
+ return durationVal, nil
+ }
+ return defaultInterval, err
+ }
+ }
+ return defaultInterval, nil
+}
+
func UpdateProtectedFields(instance *matrix.Instance) {
// check for group_type
diff --git a/cmd/collectors/commonutils_test.go b/cmd/collectors/commonutils_test.go
index 03a5034eb..db9421049 100644
--- a/cmd/collectors/commonutils_test.go
+++ b/cmd/collectors/commonutils_test.go
@@ -2,6 +2,7 @@ package collectors
import (
"github.com/netapp/harvest/v2/pkg/matrix"
+ "github.com/netapp/harvest/v2/pkg/tree/node"
"testing"
"time"
)
@@ -256,3 +257,43 @@ func testNewerTimestampThanDuration(t *testing.T) {
t.Errorf("timestamp= %f is newer than duration %s", timestamp, duration.String())
}
}
+
+func TestGetDataInterval(t *testing.T) {
+ defaultDataPollDuration := 3 * time.Minute
+ type args struct {
+ param *node.Node
+ defaultInterval time.Duration
+ }
+
+ type test struct {
+ name string
+ args args
+ want float64
+ wantErr bool
+ }
+ tests := []test{
+ {"success_return_poller_schedule", args{param: generateScheduleParam("4m"), defaultInterval: defaultDataPollDuration}, 240, false},
+ {"error_return_default_schedule", args{param: generateScheduleParam("4ma"), defaultInterval: defaultDataPollDuration}, 180, true},
+ {"return_default_schedule", args{param: generateScheduleParam(""), defaultInterval: defaultDataPollDuration}, 180, true},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got, err := GetDataInterval(tt.args.param, tt.args.defaultInterval)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("GetDataInterval() error = %v, wantErr %v", err, tt.wantErr)
+ return
+ }
+ if got.Seconds() != tt.want {
+ t.Errorf("GetDataInterval() got = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func generateScheduleParam(duration string) *node.Node {
+ root := node.NewS("root")
+ param := root.NewChildS("schedule", "")
+ param.NewChildS("data", duration)
+ return root
+}
diff --git a/cmd/collectors/ems/ems.go b/cmd/collectors/ems/ems.go
index 8d4d6762a..e3dc3edac 100644
--- a/cmd/collectors/ems/ems.go
+++ b/cmd/collectors/ems/ems.go
@@ -24,7 +24,6 @@ const defaultSeverityFilter = "alert|emergency|error|informational|notice"
const MaxBookendInstances = 1000
const DefaultBookendResolutionDuration = 28 * 24 * time.Hour // 28 days == 672 hours
const Hyphen = "-"
-const MaxAllowedTimeDrift = 10 * time.Second
type Ems struct {
*rest2.Rest // provides: AbstractCollector, Client, Object, Query, TemplateFn, TemplateType
@@ -248,58 +247,13 @@ func (e *Ems) InitCache() error {
return nil
}
-func (e *Ems) getClusterTime() (time.Time, error) {
- var (
- err error
- records []gjson.Result
- clusterTime time.Time
- timeOfNodes []int64
- )
-
- query := "private/cli/cluster/date"
- fields := []string{"date"}
-
- href := rest.BuildHref(query, strings.Join(fields, ","), nil, "", "", "1", e.ReturnTimeOut, "")
-
- if records, err = e.GetRestData(href); err != nil {
- return clusterTime, err
- }
- if len(records) == 0 {
- return clusterTime, errs.New(errs.ErrConfig, e.Object+" date not found on cluster")
- }
-
- for _, instanceData := range records {
- currentClusterDate := instanceData.Get("date")
- if currentClusterDate.Exists() {
- t, err := time.Parse(time.RFC3339, currentClusterDate.String())
- if err != nil {
- e.Logger.Error().Str("date", currentClusterDate.String()).Err(err).Msg("Failed to load cluster date")
- continue
- }
- clusterTime = t
- timeOfNodes = append(timeOfNodes, t.UnixNano())
- }
- }
-
- for _, timeOfEachNode := range timeOfNodes {
- timeDrift := time.Duration(timeOfEachNode - timeOfNodes[0]).Abs()
- if timeDrift >= MaxAllowedTimeDrift {
- e.Logger.Warn().Float64("timedrift(in sec)", timeDrift.Seconds()).Msg("Time drift exist among the nodes")
- break
- }
- }
-
- e.Logger.Debug().Str("cluster time", clusterTime.String()).Msg("")
- return clusterTime, nil
-}
-
// returns time filter (clustertime - polldata duration)
func (e *Ems) getTimeStampFilter(clusterTime time.Time) string {
fromTime := e.lastFilterTime
// check if this is the first request
if e.lastFilterTime == 0 {
// if first request fetch cluster time
- dataDuration, err := GetDataInterval(e.GetParams(), defaultDataPollDuration)
+ dataDuration, err := collectors.GetDataInterval(e.GetParams(), defaultDataPollDuration)
if err != nil {
e.Logger.Warn().Err(err).
Str("defaultDataPollDuration", defaultDataPollDuration.String()).
@@ -421,7 +375,7 @@ func (e *Ems) PollData() (map[string]*matrix.Matrix, error) {
startTime = time.Now()
// add time filter
- clusterTime, err := e.getClusterTime()
+ clusterTime, err := collectors.GetClusterTime(e.Client, e.ReturnTimeOut, e.Logger)
if err != nil {
return nil, err
}
@@ -507,25 +461,6 @@ func (e *Ems) getHref(names []string, filter []string) string {
return href
}
-// GetDataInterval fetch pollData interval
-func GetDataInterval(param *node.Node, defaultInterval time.Duration) (time.Duration, error) {
- var dataIntervalStr string
- var durationVal time.Duration
- var err error
- schedule := param.GetChildS("schedule")
- if schedule != nil {
- dataInterval := schedule.GetChildS("data")
- if dataInterval != nil {
- dataIntervalStr = dataInterval.GetContentS()
- if durationVal, err = time.ParseDuration(dataIntervalStr); err == nil {
- return durationVal, nil
- }
- return defaultInterval, err
- }
- }
- return defaultInterval, nil
-}
-
func parseProperties(instanceData gjson.Result, property string) gjson.Result {
if !strings.HasPrefix(property, "parameters.") {
diff --git a/cmd/collectors/rest/plugins/health/health.go b/cmd/collectors/rest/plugins/health/health.go
new file mode 100644
index 000000000..a8f5d098d
--- /dev/null
+++ b/cmd/collectors/rest/plugins/health/health.go
@@ -0,0 +1,717 @@
+package health
+
+import (
+ "fmt"
+ goversion "github.com/hashicorp/go-version"
+ "github.com/netapp/harvest/v2/cmd/collectors"
+ "github.com/netapp/harvest/v2/cmd/poller/plugin"
+ "github.com/netapp/harvest/v2/cmd/tools/rest"
+ "github.com/netapp/harvest/v2/pkg/conf"
+ "github.com/netapp/harvest/v2/pkg/errs"
+ "github.com/netapp/harvest/v2/pkg/matrix"
+ "github.com/tidwall/gjson"
+ "strconv"
+ "strings"
+ "time"
+)
+
+type AlertSeverity string
+
+const (
+ errr AlertSeverity = "error"
+ warning AlertSeverity = "warning"
+ diskHealthMatrix = "health_disk"
+ shelfHealthMatrix = "health_shelf"
+ supportHealthMatrix = "health_support"
+ nodeHealthMatrix = "health_node"
+ networkEthernetPortHealthMatrix = "health_network_ethernet_port"
+ networkFCPortHealthMatrix = "health_network_fc_port"
+ networkInterfaceHealthMatrix = "health_network_interface"
+ volumeRansomwareHealthMatrix = "health_volume_ransomware"
+ volumeMoveHealthMatrix = "health_volume_move"
+ licenseHealthMatrix = "health_license"
+ severityLabel = "severity"
+ defaultDataPollDuration = 3 * time.Minute
+)
+
+type Health struct {
+ *plugin.AbstractPlugin
+ client *rest.Client
+ data map[string]*matrix.Matrix
+ lastFilterTime int64
+}
+
+func New(p *plugin.AbstractPlugin) plugin.Plugin {
+ return &Health{AbstractPlugin: p}
+}
+
+var metrics = []string{
+ "alerts",
+}
+
+func (h *Health) Init() error {
+
+ var err error
+
+ if err = h.InitAbc(); err != nil {
+ return err
+ }
+
+ if err = h.initAllMatrix(); err != nil {
+ return err
+ }
+
+ timeout, _ := time.ParseDuration(rest.DefaultTimeout)
+ if h.client, err = rest.New(conf.ZapiPoller(h.ParentParams), timeout, h.Auth); err != nil {
+ return err
+ }
+
+ if err = h.client.Init(5); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (h *Health) initAllMatrix() error {
+ h.data = make(map[string]*matrix.Matrix)
+ mats := []string{diskHealthMatrix, shelfHealthMatrix, supportHealthMatrix, nodeHealthMatrix,
+ networkEthernetPortHealthMatrix, networkFCPortHealthMatrix, networkInterfaceHealthMatrix,
+ volumeRansomwareHealthMatrix, volumeMoveHealthMatrix, licenseHealthMatrix}
+ for _, m := range mats {
+ if err := h.initMatrix(m); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (h *Health) initMatrix(name string) error {
+ h.data[name] = matrix.New(h.Parent+name, name, name)
+ for _, v1 := range h.data {
+ v1.SetExportOptions(matrix.DefaultExportOptions())
+ }
+ for _, k := range metrics {
+ err := matrix.CreateMetric(k, h.data[name])
+ if err != nil {
+ h.Logger.Warn().Err(err).Str("key", k).Msg("error while creating metric")
+ return err
+ }
+ }
+ return nil
+}
+
+func (h *Health) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, error) {
+ data := dataMap[h.Object]
+ clusterVersion := h.client.Cluster().GetVersion()
+ ontapVersion, err := goversion.NewVersion(clusterVersion)
+ if err != nil {
+ h.Logger.Error().Err(err).
+ Str("version", clusterVersion).
+ Msg("Failed to parse version")
+ return nil, nil
+ }
+ version96 := "9.6"
+ version96After, err := goversion.NewVersion(version96)
+ if err != nil {
+ h.Logger.Error().Err(err).
+ Str("version", version96).
+ Msg("Failed to parse version")
+ return nil, nil
+ }
+
+ if ontapVersion.LessThan(version96After) {
+ return nil, nil
+ }
+
+ // Purge and reset data
+ // remove all metrics as analytics label may change over time
+ err = h.initAllMatrix()
+ if err != nil {
+ h.Logger.Warn().Err(err).Msg("error while init matrix")
+ return nil, err
+ }
+ for k := range h.data {
+ // Set all global labels if already not exist
+ h.data[k].SetGlobalLabels(data.GetGlobalLabels())
+ }
+
+ h.collectDiskAlerts()
+ h.collectShelfAlerts()
+ h.collectSupportAlerts()
+ h.collectNodeAlerts()
+ h.collectNetworkEthernetPortAlerts()
+ h.collectNetworkFCPortAlerts()
+ h.collectNetworkInterfacesAlerts()
+ h.collectVolumeRansomwareAlerts()
+ h.collectVolumeMoveAlerts()
+ h.collectLicenseAlerts()
+
+ result := make([]*matrix.Matrix, 0, len(h.data))
+
+ for _, value := range h.data {
+ result = append(result, value)
+ }
+ return result, nil
+}
+
+func (h *Health) collectLicenseAlerts() {
+ var (
+ instance *matrix.Instance
+ )
+
+ records, err := h.getNonCompliantLicense()
+ if err != nil {
+ if errs.IsRestErr(err, errs.APINotFound) {
+ h.Logger.Debug().Err(err).Msg("API not found")
+ } else {
+ h.Logger.Error().Err(err).Msg("Failed to collect analytic data")
+ }
+ return
+ }
+ mat := h.data[licenseHealthMatrix]
+ for _, record := range records {
+ name := record.Get("name").String()
+ scope := record.Get("scope").String()
+ state := record.Get("state").String()
+ instance, err = mat.NewInstance(name)
+ if err != nil {
+ h.Logger.Warn().Str("key", name).Msg("error while creating instance")
+ continue
+ }
+ instance.SetLabel("name", name)
+ instance.SetLabel("scope", scope)
+ instance.SetLabel("state", state)
+ instance.SetLabel(severityLabel, string(errr))
+
+ h.setAlertMetric(mat, instance)
+ }
+}
+
+func (h *Health) collectVolumeMoveAlerts() {
+ var (
+ instance *matrix.Instance
+ )
+
+ records, err := h.getMoveFailedVolumes()
+ if err != nil {
+ if errs.IsRestErr(err, errs.APINotFound) {
+ h.Logger.Debug().Err(err).Msg("API not found")
+ } else {
+ h.Logger.Error().Err(err).Msg("Failed to collect analytic data")
+ }
+ return
+ }
+ mat := h.data[volumeMoveHealthMatrix]
+ for _, record := range records {
+ uuid := record.Get("uuid").String()
+ volume := record.Get("name").String()
+ svm := record.Get("svm.name").String()
+ movementState := record.Get("movement.state").String()
+ instance, err = mat.NewInstance(uuid)
+ if err != nil {
+ h.Logger.Warn().Str("key", uuid).Msg("error while creating instance")
+ continue
+ }
+ instance.SetLabel("movement_state", movementState)
+ instance.SetLabel("svm", svm)
+ instance.SetLabel("volume", volume)
+ instance.SetLabel(severityLabel, string(warning))
+
+ h.setAlertMetric(mat, instance)
+ }
+}
+
+func (h *Health) collectVolumeRansomwareAlerts() {
+ var (
+ instance *matrix.Instance
+ )
+ clusterVersion := h.client.Cluster().GetVersion()
+ ontapVersion, err := goversion.NewVersion(clusterVersion)
+ if err != nil {
+ h.Logger.Error().Err(err).
+ Str("version", clusterVersion).
+ Msg("Failed to parse version")
+ return
+ }
+ version910 := "9.10"
+ version910After, err := goversion.NewVersion(version910)
+ if err != nil {
+ h.Logger.Error().Err(err).
+ Str("version", version910).
+ Msg("Failed to parse version")
+ return
+ }
+
+ if ontapVersion.LessThan(version910After) {
+ return
+ }
+ records, err := h.getRansomwareVolumes()
+ if err != nil {
+ if errs.IsRestErr(err, errs.APINotFound) {
+ h.Logger.Debug().Err(err).Msg("API not found")
+ } else {
+ h.Logger.Error().Err(err).Msg("Failed to collect analytic data")
+ }
+ return
+ }
+ mat := h.data[volumeRansomwareHealthMatrix]
+ for _, record := range records {
+ uuid := record.Get("uuid").String()
+ volume := record.Get("name").String()
+ antiRansomwareAttackProbability := record.Get("anti_ransomware.attack_probability").String()
+ instance, err = mat.NewInstance(uuid)
+ if err != nil {
+ h.Logger.Warn().Str("key", uuid).Msg("error while creating instance")
+ continue
+ }
+ instance.SetLabel("anti_ransomware_attack_probability", antiRansomwareAttackProbability)
+
+ instance.SetLabel("volume", volume)
+ instance.SetLabel(severityLabel, string(errr))
+
+ h.setAlertMetric(mat, instance)
+ }
+}
+
+func (h *Health) collectNetworkInterfacesAlerts() {
+ var (
+ instance *matrix.Instance
+ )
+ records, err := h.getNonHomeLIFs()
+ if err != nil {
+ if errs.IsRestErr(err, errs.APINotFound) {
+ h.Logger.Debug().Err(err).Msg("API not found")
+ } else {
+ h.Logger.Error().Err(err).Msg("Failed to collect analytic data")
+ }
+ return
+ }
+ mat := h.data[networkInterfaceHealthMatrix]
+ for _, record := range records {
+ uuid := record.Get("uuid").String()
+ lif := record.Get("name").String()
+ isHome := record.Get("location.is_home").String()
+ instance, err = mat.NewInstance(uuid)
+ if err != nil {
+ h.Logger.Warn().Str("key", uuid).Msg("error while creating instance")
+ continue
+ }
+ instance.SetLabel("isHome", isHome)
+ instance.SetLabel("lif", lif)
+ instance.SetLabel(severityLabel, string(warning))
+
+ h.setAlertMetric(mat, instance)
+ }
+}
+
+func (h *Health) collectNetworkFCPortAlerts() {
+ var (
+ instance *matrix.Instance
+ )
+ records, err := h.getFCPorts()
+ if err != nil {
+ if errs.IsRestErr(err, errs.APINotFound) {
+ h.Logger.Debug().Err(err).Msg("API not found")
+ } else {
+ h.Logger.Error().Err(err).Msg("Failed to collect analytic data")
+ }
+ return
+ }
+ mat := h.data[networkFCPortHealthMatrix]
+ for _, record := range records {
+ uuid := record.Get("uuid").String()
+ nodeName := record.Get("node.name").String()
+ port := record.Get("name").String()
+ state := record.Get("state").String()
+ instance, err = mat.NewInstance(uuid)
+ if err != nil {
+ h.Logger.Warn().Str("key", uuid).Msg("error while creating instance")
+ continue
+ }
+ instance.SetLabel("node", nodeName)
+ instance.SetLabel("state", state)
+ instance.SetLabel("port", port)
+ instance.SetLabel(severityLabel, string(errr))
+
+ h.setAlertMetric(mat, instance)
+ }
+}
+
+func (h *Health) collectNetworkEthernetPortAlerts() {
+ var (
+ instance *matrix.Instance
+ )
+ records, err := h.getEthernetPorts()
+ if err != nil {
+ if errs.IsRestErr(err, errs.APINotFound) {
+ h.Logger.Debug().Err(err).Msg("API not found")
+ } else {
+ h.Logger.Error().Err(err).Msg("Failed to collect analytic data")
+ }
+ return
+ }
+ mat := h.data[networkEthernetPortHealthMatrix]
+ for _, record := range records {
+ uuid := record.Get("uuid").String()
+ port := record.Get("name").String()
+ nodeName := record.Get("node.name").String()
+ portType := record.Get("type").String()
+ state := record.Get("state").String()
+ instance, err = mat.NewInstance(uuid)
+ if err != nil {
+ h.Logger.Warn().Str("key", uuid).Msg("error while creating instance")
+ continue
+ }
+ instance.SetLabel("node", nodeName)
+ instance.SetLabel("state", state)
+ instance.SetLabel("port", port)
+ instance.SetLabel("type", portType)
+ instance.SetLabel(severityLabel, string(errr))
+
+ h.setAlertMetric(mat, instance)
+ }
+}
+
+func (h *Health) collectNodeAlerts() {
+ var (
+ instance *matrix.Instance
+ )
+ records, err := h.getNodes()
+ if err != nil {
+ if errs.IsRestErr(err, errs.APINotFound) {
+ h.Logger.Debug().Err(err).Msg("API not found")
+ } else {
+ h.Logger.Error().Err(err).Msg("Failed to collect analytic data")
+ }
+ return
+ }
+ mat := h.data[nodeHealthMatrix]
+ for _, record := range records {
+ nodeName := record.Get("node").String()
+
+ instance, err = mat.NewInstance(nodeName)
+ if err != nil {
+ h.Logger.Warn().Str("key", nodeName).Msg("error while creating instance")
+ continue
+ }
+ instance.SetLabel("node", nodeName)
+ instance.SetLabel("healthy", "false")
+ instance.SetLabel(severityLabel, string(errr))
+
+ h.setAlertMetric(mat, instance)
+ }
+}
+
+func (h *Health) collectShelfAlerts() {
+ var (
+ instance *matrix.Instance
+ )
+ records, err := h.getShelves()
+ if err != nil {
+ if errs.IsRestErr(err, errs.APINotFound) {
+ h.Logger.Debug().Err(err).Msg("API not found")
+ } else {
+ h.Logger.Error().Err(err).Msg("Failed to collect analytic data")
+ }
+ return
+ }
+ mat := h.data[shelfHealthMatrix]
+ for _, record := range records {
+ shelf := record.Get("shelf").String()
+ errorType := record.Get("error_type").String()
+ errorSeverity := record.Get("error_severity").String()
+ errorText := record.Get("error_text").String()
+
+ //errorSeverity possible values are unknown|notice|warning|error|critical
+ if errorSeverity == "error" || errorSeverity == "critical" || errorSeverity == "warning" {
+ instance, err = mat.NewInstance(shelf)
+ if err != nil {
+ h.Logger.Warn().Str("key", shelf).Msg("error while creating instance")
+ continue
+ }
+ instance.SetLabel("shelf", shelf)
+ instance.SetLabel("error_type", errorType)
+ instance.SetLabel("error_text", errorText)
+ if errorSeverity == "error" || errorSeverity == "critical" {
+ instance.SetLabel(severityLabel, string(errr))
+ } else if errorSeverity == "warning" {
+ instance.SetLabel(severityLabel, string(warning))
+ }
+
+ h.setAlertMetric(mat, instance)
+ }
+ }
+}
+
+func (h *Health) collectSupportAlerts() {
+ var (
+ instance *matrix.Instance
+ )
+ clusterTime, err := collectors.GetClusterTime(h.client, "", h.Logger)
+ if err != nil {
+ h.Logger.Error().Err(err).Msg("Failed to collect cluster time")
+ return
+ }
+ toTime := clusterTime.Unix()
+ timeFilter := h.getTimeStampFilter(clusterTime)
+ addFilter := []string{"suppress=false"}
+ filter := append(addFilter, timeFilter)
+
+ records, err := h.getSupportAlerts(filter)
+ if err != nil {
+ if errs.IsRestErr(err, errs.APINotFound) {
+ h.Logger.Debug().Err(err).Msg("API not found")
+ } else {
+ h.Logger.Error().Err(err).Msg("Failed to collect analytic data")
+ }
+ return
+ }
+ mat := h.data[supportHealthMatrix]
+ for index, record := range records {
+ nodeName := record.Get("node.name").String()
+ monitor := record.Get("monitor").String()
+ name := record.Get("name").String()
+ resource := record.Get("resource").String()
+ reason := record.Get("cause.message").String()
+ correctiveAction := record.Get("corrective_action.message").String()
+ instance, err = mat.NewInstance(strconv.Itoa(index))
+ if err != nil {
+ h.Logger.Warn().Int("key", index).Msg("error while creating instance")
+ continue
+ }
+ instance.SetLabel("node", nodeName)
+ instance.SetLabel("monitor", monitor)
+ instance.SetLabel("name", name)
+ instance.SetLabel("resource", resource)
+ instance.SetLabel("reason", reason)
+ instance.SetLabel("correctiveAction", correctiveAction)
+ instance.SetLabel(severityLabel, string(warning))
+
+ h.setAlertMetric(mat, instance)
+ }
+ // update lastFilterTime to current cluster time
+ h.lastFilterTime = toTime
+}
+
+func (h *Health) collectDiskAlerts() {
+ var (
+ instance *matrix.Instance
+ )
+ records, err := h.getDisks()
+ if err != nil {
+ if errs.IsRestErr(err, errs.APINotFound) {
+ h.Logger.Debug().Err(err).Msg("API not found")
+ } else {
+ h.Logger.Error().Err(err).Msg("Failed to collect analytic data")
+ }
+ return
+ }
+ mat := h.data[diskHealthMatrix]
+ for _, record := range records {
+ name := record.Get("name").String()
+ containerType := record.Get("container_type").String()
+ instance, err = mat.NewInstance(name)
+ if err != nil {
+ h.Logger.Warn().Str("key", name).Msg("error while creating instance")
+ continue
+ }
+ instance.SetLabel("disk", name)
+ instance.SetLabel("container_type", containerType)
+ if containerType == "broken" {
+ instance.SetLabel(severityLabel, string(errr))
+ } else if containerType == "unassigned" {
+ instance.SetLabel(severityLabel, string(warning))
+ }
+
+ h.setAlertMetric(mat, instance)
+ }
+}
+
+func (h *Health) getDisks() ([]gjson.Result, error) {
+ var (
+ result []gjson.Result
+ err error
+ )
+
+ fields := []string{"name", "container_type"}
+ query := "api/storage/disks"
+ href := rest.BuildHref(query, strings.Join(fields, ","), []string{"container_type=broken|unassigned"}, "", "", "", "", query)
+
+ if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+func (h *Health) getShelves() ([]gjson.Result, error) {
+ var (
+ result []gjson.Result
+ err error
+ )
+
+ fields := []string{"error_type", "error_severity", "error_text"}
+ query := "api/private/cli/storage/shelf"
+ href := rest.BuildHref(query, strings.Join(fields, ","), nil, "", "", "", "", query)
+
+ if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+func (h *Health) getNodes() ([]gjson.Result, error) {
+ var (
+ result []gjson.Result
+ err error
+ )
+
+ fields := []string{"health"}
+ query := "api/private/cli/node"
+ href := rest.BuildHref(query, strings.Join(fields, ","), []string{"health=false"}, "", "", "", "", query)
+
+ if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+func (h *Health) getRansomwareVolumes() ([]gjson.Result, error) {
+ var (
+ result []gjson.Result
+ err error
+ )
+
+ query := "api/storage/volumes"
+ href := rest.BuildHref(query, "", []string{"anti_ransomware.state=enabled", "anti_ransomware.attack_probability=low|moderate|high"}, "", "", "", "", query)
+
+ if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+func (h *Health) getNonCompliantLicense() ([]gjson.Result, error) {
+ var (
+ result []gjson.Result
+ err error
+ )
+
+ query := "api/cluster/licensing/licenses"
+ fields := []string{"name,scope,state"}
+ href := rest.BuildHref(query, strings.Join(fields, ","), []string{"state=noncompliant"}, "", "", "", "", query)
+
+ if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+func (h *Health) getMoveFailedVolumes() ([]gjson.Result, error) {
+ var (
+ result []gjson.Result
+ err error
+ )
+
+ query := "api/storage/volumes"
+ fields := []string{"uuid,name,movement.state,svm"}
+ href := rest.BuildHref(query, strings.Join(fields, ","), []string{"movement.state=cutover_wait|failed|cutover_pending"}, "", "", "", "", query)
+
+ if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+func (h *Health) getNonHomeLIFs() ([]gjson.Result, error) {
+ var (
+ result []gjson.Result
+ err error
+ )
+
+ query := "api/network/ip/interfaces"
+ href := rest.BuildHref(query, "", []string{"location.is_home=false"}, "", "", "", "", query)
+
+ if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+func (h *Health) getFCPorts() ([]gjson.Result, error) {
+ var (
+ result []gjson.Result
+ err error
+ )
+
+ fields := []string{"name,node"}
+ query := "api/network/fc/ports"
+ href := rest.BuildHref(query, strings.Join(fields, ","), []string{"enabled=true", "state=offlined_by_system"}, "", "", "", "", query)
+
+ if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+func (h *Health) getEthernetPorts() ([]gjson.Result, error) {
+ var (
+ result []gjson.Result
+ err error
+ )
+
+ fields := []string{"name,node"}
+ query := "api/network/ethernet/ports"
+ href := rest.BuildHref(query, strings.Join(fields, ","), []string{"enabled=true", "state=down"}, "", "", "", "", query)
+
+ if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+func (h *Health) getSupportAlerts(filter []string) ([]gjson.Result, error) {
+ var (
+ result []gjson.Result
+ err error
+ )
+ query := "api/private/support/alerts"
+ href := rest.BuildHref(query, "", filter, "", "", "", "", query)
+
+ if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil {
+ return nil, err
+ }
+
+ return result, nil
+}
+
+// returns time filter (clustertime - polldata duration)
+func (h *Health) getTimeStampFilter(clusterTime time.Time) string {
+ fromTime := h.lastFilterTime
+ // check if this is the first request
+ if h.lastFilterTime == 0 {
+ // if first request fetch cluster time
+ dataDuration, err := collectors.GetDataInterval(h.ParentParams, defaultDataPollDuration)
+ if err != nil {
+ h.Logger.Warn().Err(err).
+ Str("defaultDataPollDuration", defaultDataPollDuration.String()).
+ Msg("Failed to parse duration. using default")
+ }
+ fromTime = clusterTime.Add(-dataDuration).Unix()
+ }
+ return fmt.Sprintf("time=>=%d", fromTime)
+}
+
+func (h *Health) setAlertMetric(mat *matrix.Matrix, instance *matrix.Instance) {
+ var err error
+ m := mat.GetMetric("alerts")
+ if m == nil {
+ if m, err = mat.NewMetricFloat64("alerts"); err != nil {
+ h.Logger.Warn().Err(err).Str("key", "alerts").Msg("error while creating metric")
+ return
+ }
+ }
+ if err = m.SetValueFloat64(instance, 1); err != nil {
+ h.Logger.Error().Err(err).Str("metric", "alerts").Msg("Unable to set value on metric")
+ }
+}
diff --git a/cmd/collectors/rest/rest.go b/cmd/collectors/rest/rest.go
index 056784a3d..efed92b17 100644
--- a/cmd/collectors/rest/rest.go
+++ b/cmd/collectors/rest/rest.go
@@ -4,6 +4,7 @@ import (
"fmt"
"github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/certificate"
"github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/disk"
+ "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/health"
"github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/netroute"
"github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/qospolicyadaptive"
"github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/qospolicyfixed"
@@ -369,6 +370,8 @@ func (r *Rest) LoadPlugin(kind string, abc *plugin.AbstractPlugin) plugin.Plugin
switch kind {
case "Disk":
return disk.New(abc)
+ case "Health":
+ return health.New(abc)
case "NetRoute":
return netroute.New(abc)
case "Qtree":
diff --git a/cmd/tools/grafana/dashboard_test.go b/cmd/tools/grafana/dashboard_test.go
index 315afba12..cb258f8e0 100644
--- a/cmd/tools/grafana/dashboard_test.go
+++ b/cmd/tools/grafana/dashboard_test.go
@@ -311,7 +311,7 @@ func doPanel(t *testing.T, pathPrefix string, key gjson.Result, value gjson.Resu
numExpressions := len(expressions)
for _, e := range expressions {
// Ignore labels and _status
- if strings.HasSuffix(e.metric, "_labels") || strings.HasSuffix(e.metric, "_status") {
+ if strings.HasSuffix(e.metric, "_labels") || strings.HasSuffix(e.metric, "_status") || strings.HasSuffix(e.metric, "_events") || strings.HasSuffix(e.metric, "_alerts") {
continue
}
unit := unitForExpr(e, overrides, defaultUnit, valueToName, numExpressions)
@@ -615,6 +615,7 @@ func TestOnlyHighlightsExpanded(t *testing.T) {
"cmode/fsa.json": 2,
"cmode/workload.json": 2,
"cmode/smb2.json": 2,
+ "cmode/health.json": 2,
}
// count number of expanded sections in dashboard and ensure num expanded = 1
visitDashboards(
diff --git a/conf/rest/9.12.0/node.yaml b/conf/rest/9.12.0/node.yaml
index d77e503ed..397a1434a 100644
--- a/conf/rest/9.12.0/node.yaml
+++ b/conf/rest/9.12.0/node.yaml
@@ -11,6 +11,7 @@ counters:
- ^location
- ^model
- ^serial_number => serial
+ - ^state
- ^version.full => version
- controller.failed_fan.count => failed_fan
- controller.failed_power_supply.count => failed_power
@@ -47,5 +48,6 @@ export_options:
- model
- serial
- uptime
+ - state
- vendor
- version
diff --git a/conf/rest/9.6.0/health.yaml b/conf/rest/9.6.0/health.yaml
new file mode 100644
index 000000000..41c0fe245
--- /dev/null
+++ b/conf/rest/9.6.0/health.yaml
@@ -0,0 +1,12 @@
+name: Health
+query: api/cluster
+object: health
+
+counters:
+ - ^^uuid
+ - ^name
+
+plugins:
+ - Health
+
+export_data: false
diff --git a/conf/rest/default.yaml b/conf/rest/default.yaml
index 95ef33a1b..bedb4e2fa 100644
--- a/conf/rest/default.yaml
+++ b/conf/rest/default.yaml
@@ -13,6 +13,7 @@ objects:
ClusterPeer: clusterpeer.yaml
Disk: disk.yaml
# ExportRule: exports.yaml
+ Health: health.yaml
Lun: lun.yaml
Namespace: namespace.yaml
NetConnections: netconnections.yaml
diff --git a/grafana/dashboards/cmode/health.json b/grafana/dashboards/cmode/health.json
new file mode 100644
index 000000000..6f9f3dbbd
--- /dev/null
+++ b/grafana/dashboards/cmode/health.json
@@ -0,0 +1,3219 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "8.1.8"
+ },
+ {
+ "type": "panel",
+ "id": "piechart",
+ "name": "Pie chart",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "stat",
+ "name": "Stat",
+ "version": ""
+ },
+ {
+ "type": "panel",
+ "id": "table",
+ "name": "Table",
+ "version": ""
+ },
+ {
+ "type": "panel",
+ "id": "text",
+ "name": "Text",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [],
+ "type": "dashboard"
+ },
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "",
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "id": null,
+ "iteration": 1681133194625,
+ "links": [
+ {
+ "asDropdown": true,
+ "icon": "external link",
+ "includeVars": true,
+ "keepTime": true,
+ "tags": [
+ "cdot"
+ ],
+ "targetBlank": false,
+ "title": "Related Dashboards",
+ "tooltip": "",
+ "type": "dashboards",
+ "url": ""
+ }
+ ],
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 239,
+ "panels": [],
+ "title": "Important Information about Health Dashboard",
+ "type": "row"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 3,
+ "w": 24,
+ "x": 0,
+ "y": 1
+ },
+ "id": 241,
+ "options": {
+ "content": "This dashboard requires ONTAP 9.6+ and the REST collector. Two actions are required to use this dashboard:
1. Enable the REST collector in your harvest.yml config
2. Enable the EMS collector in your harvest.yml config for EMS events
",
+ "mode": "markdown"
+ },
+ "pluginVersion": "8.1.8",
+ "type": "text"
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 4
+ },
+ "id": 225,
+ "panels": [],
+ "title": "Highlights",
+ "type": "row"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "dark-red",
+ "value": null
+ }
+ ]
+ },
+ "unit": "locale"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Volumes protected"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Volumes not protected"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "yellow",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 8,
+ "x": 0,
+ "y": 5
+ },
+ "id": 277,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "text": {},
+ "textMode": "auto"
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "(count(health_disk_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))\n+\n(count(health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))\n+\n(count(health_node_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))\n+\n(count(health_network_fc_port_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))\n+\n(count(health_network_ethernet_port_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))\n+\n(count(health_license_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Total Errors",
+ "transformations": [],
+ "type": "stat"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "orange",
+ "value": null
+ }
+ ]
+ },
+ "unit": "locale"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Volumes protected"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Volumes not protected"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "yellow",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 8,
+ "x": 8,
+ "y": 5
+ },
+ "id": 278,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "text": {},
+ "textMode": "auto"
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "(count(health_disk_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0))\n+\n(count(health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0))\n+\n(count(last_over_time(health_support_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}[24h]) == 1) or vector(0))\n+\n(count(health_network_interface_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0))\n+\n(count(health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0))\n+\n(count(health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Total Warnings",
+ "transformations": [],
+ "type": "stat"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "$EMSDescription",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "dark-red",
+ "value": null
+ }
+ ]
+ },
+ "unit": "locale"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Volumes not protected"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "yellow",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 8,
+ "x": 16,
+ "y": 5
+ },
+ "id": 270,
+ "options": {
+ "colorMode": "value",
+ "graphMode": "area",
+ "justifyMode": "auto",
+ "orientation": "auto",
+ "reduceOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "text": {},
+ "textMode": "auto"
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "(count(last_over_time(ems_events{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"emergency\"}[$__range]) == 1) or vector(0))",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Total Active Emergency EMS",
+ "transformations": [],
+ "type": "stat"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "decimals": 0,
+ "mappings": [],
+ "unit": "locale"
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 16,
+ "w": 8,
+ "x": 0,
+ "y": 11
+ },
+ "id": 268,
+ "options": {
+ "displayLabels": [
+ "value"
+ ],
+ "legend": {
+ "displayMode": "table",
+ "placement": "bottom",
+ "values": [
+ "value"
+ ]
+ },
+ "pieType": "donut",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "pluginVersion": "8.1.2",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "count(health_disk_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0)",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Broken Disk",
+ "refId": "A"
+ },
+ {
+ "exemplar": false,
+ "expr": "count(health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Shelf Error",
+ "refId": "B"
+ },
+ {
+ "exemplar": false,
+ "expr": "count(health_node_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Node Down",
+ "refId": "D"
+ },
+ {
+ "exemplar": false,
+ "expr": "(count(health_network_fc_port_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))\n+\n(count(health_network_ethernet_port_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Network Port Down",
+ "refId": "E"
+ },
+ {
+ "exemplar": false,
+ "expr": "count(health_license_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "License Non Compliant",
+ "refId": "C"
+ }
+ ],
+ "title": "Errors",
+ "transformations": [],
+ "type": "piechart"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ }
+ },
+ "decimals": 0,
+ "mappings": [],
+ "unit": "locale"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Volumes protected"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Volumes not protected"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "yellow",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 16,
+ "w": 8,
+ "x": 8,
+ "y": 11
+ },
+ "id": 269,
+ "options": {
+ "displayLabels": [
+ "value"
+ ],
+ "legend": {
+ "displayMode": "table",
+ "placement": "bottom",
+ "values": [
+ "value"
+ ]
+ },
+ "pieType": "donut",
+ "reduceOptions": {
+ "calcs": [
+ "last"
+ ],
+ "fields": "",
+ "values": false
+ },
+ "tooltip": {
+ "mode": "single"
+ }
+ },
+ "pluginVersion": "8.1.2",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "count(health_disk_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0)",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Unassigned Disk",
+ "refId": "A"
+ },
+ {
+ "exemplar": false,
+ "expr": "count(health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Shelf Warning",
+ "refId": "B"
+ },
+ {
+ "exemplar": false,
+ "expr": "(count(last_over_time(health_support_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}[24h]) == 1) or vector(0))",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Health Monitor Alerts (last 24h)",
+ "refId": "C"
+ },
+ {
+ "exemplar": false,
+ "expr": "count(health_network_interface_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Network Interface are not at home port",
+ "refId": "D"
+ },
+ {
+ "exemplar": false,
+ "expr": "count(health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Volume Move Alerts",
+ "refId": "E"
+ },
+ {
+ "exemplar": false,
+ "expr": "count(health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0)",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "Volume Ransomware (9.10+)",
+ "refId": "F"
+ }
+ ],
+ "title": "Warnings",
+ "transformations": [],
+ "type": "piechart"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "$EMSDescription",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "displayMode": "auto"
+ },
+ "decimals": 0,
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "locale"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Volumes protected"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "green",
+ "mode": "fixed"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Volumes not protected"
+ },
+ "properties": [
+ {
+ "id": "color",
+ "value": {
+ "fixedColor": "yellow",
+ "mode": "fixed"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 16,
+ "w": 8,
+ "x": 16,
+ "y": 11
+ },
+ "id": 272,
+ "options": {
+ "showHeader": true
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "count(last_over_time(ems_events{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"emergency\"}[$__range]) == 1) by (message)",
+ "format": "table",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Active Emergency EMS",
+ "transformations": [
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true
+ },
+ "indexByName": {},
+ "renameByName": {
+ "Value": "Count",
+ "message": "EMS"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "collapsed": true,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 27
+ },
+ "id": 251,
+ "panels": [
+ {
+ "datasource": null,
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "displayMode": "auto"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Healthy"
+ },
+ "properties": [
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "false": {
+ "index": 0,
+ "text": "No"
+ }
+ },
+ "type": "value"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 24,
+ "x": 0,
+ "y": 19
+ },
+ "id": 253,
+ "options": {
+ "showHeader": true
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "node_labels{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(cluster,node,datacenter) group_left(severity) health_node_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Node Issues",
+ "transformations": [
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value": true,
+ "instance": true,
+ "job": true
+ },
+ "indexByName": {
+ "Time": 0,
+ "Value": 10,
+ "cluster": 1,
+ "datacenter": 2,
+ "healthy": 4,
+ "instance": 8,
+ "job": 9,
+ "node": 3,
+ "severity": 6,
+ "state": 5,
+ "version": 7
+ },
+ "renameByName": {
+ "cluster": "Cluster",
+ "datacenter": "Datacenter",
+ "healthy": "Healthy",
+ "node": "Node",
+ "severity": "Severity",
+ "state": "State",
+ "version": "Version"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ }
+ ],
+ "title": "Node",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 28
+ },
+ "id": 230,
+ "panels": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "displayMode": "auto",
+ "filterable": true
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Datacenter"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "string"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Shelf"
+ },
+ "properties": [
+ {
+ "id": "custom.width",
+ "value": null
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 24,
+ "x": 0,
+ "y": 29
+ },
+ "id": 248,
+ "options": {
+ "showHeader": true,
+ "sortBy": []
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "disk_labels{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(disk,cluster,datacenter) group_left(severity) health_disk_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Disks Issues",
+ "transformations": [
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value": true,
+ "failed": true,
+ "index": true,
+ "instance": true,
+ "job": true,
+ "node": true,
+ "owner_node": true
+ },
+ "indexByName": {
+ "Time": 0,
+ "Value": 11,
+ "cluster": 2,
+ "container_type": 5,
+ "datacenter": 1,
+ "disk": 3,
+ "instance": 9,
+ "job": 10,
+ "model": 8,
+ "serial_number": 7,
+ "severity": 4,
+ "shelf": 6
+ },
+ "renameByName": {
+ "cluster": "Cluster",
+ "container_type": "Container Type",
+ "datacenter": "Datacenter",
+ "disk": "Disk",
+ "model": "Model",
+ "serial_number": "Serial Number",
+ "severity": "Severity",
+ "shelf": "Shelf"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ }
+ ],
+ "title": "Disks",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 29
+ },
+ "id": 245,
+ "panels": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "left",
+ "displayMode": "auto",
+ "filterable": true
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "semi-dark-yellow",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "disk_count"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "locale"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "shelf"
+ },
+ "properties": [
+ {
+ "id": "custom.displayMode",
+ "value": "json-view"
+ },
+ {
+ "id": "custom.width"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "state"
+ },
+ "properties": [
+ {
+ "id": "custom.displayMode",
+ "value": "color-background"
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgb(224, 47, 47)",
+ "value": null
+ },
+ {
+ "color": "rgb(118, 204, 49)",
+ "value": 1
+ }
+ ]
+ }
+ },
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "1": {
+ "text": "ONLINE"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "from": 0,
+ "result": {
+ "text": "OFFLINE"
+ },
+ "to": 0.999
+ },
+ "type": "range"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Avg Ambient Temp (C)"
+ },
+ "properties": [
+ {
+ "id": "custom.displayMode",
+ "value": "color-background"
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "transparent",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 5
+ },
+ {
+ "color": "#EAB839",
+ "value": 45
+ },
+ {
+ "color": "orange",
+ "value": 65
+ },
+ {
+ "color": "red",
+ "value": 75
+ }
+ ]
+ }
+ },
+ {
+ "id": "custom.align",
+ "value": "right"
+ },
+ {
+ "id": "unit",
+ "value": "celsius"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Max Temp (C)"
+ },
+ "properties": [
+ {
+ "id": "custom.displayMode",
+ "value": "color-background"
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "transparent",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 5
+ },
+ {
+ "color": "yellow",
+ "value": 45
+ },
+ {
+ "color": "orange",
+ "value": 65
+ },
+ {
+ "color": "red",
+ "value": 75
+ }
+ ]
+ }
+ },
+ {
+ "id": "custom.align",
+ "value": "right"
+ },
+ {
+ "id": "unit",
+ "value": "celsius"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Avg Temp (C)"
+ },
+ "properties": [
+ {
+ "id": "custom.displayMode",
+ "value": "color-background"
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "transparent",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 5
+ },
+ {
+ "color": "yellow",
+ "value": 45
+ },
+ {
+ "color": "orange",
+ "value": 65
+ },
+ {
+ "color": "red",
+ "value": 75
+ }
+ ]
+ }
+ },
+ {
+ "id": "custom.align",
+ "value": "right"
+ },
+ {
+ "id": "unit",
+ "value": "celsius"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Max Fan Speed (rpm)"
+ },
+ "properties": [
+ {
+ "id": "custom.align",
+ "value": "right"
+ },
+ {
+ "id": "unit",
+ "value": "rotrpm"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Avg Fan Speed (rpm)"
+ },
+ "properties": [
+ {
+ "id": "custom.align",
+ "value": "right"
+ },
+ {
+ "id": "unit",
+ "value": "rotrpm"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Min Fan Speed (rpm)"
+ },
+ "properties": [
+ {
+ "id": "custom.align",
+ "value": "right"
+ },
+ {
+ "id": "unit",
+ "value": "rotrpm"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Power"
+ },
+ "properties": [
+ {
+ "id": "custom.align",
+ "value": "right"
+ },
+ {
+ "id": "unit",
+ "value": "watt"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Min Ambient Temp (C)"
+ },
+ "properties": [
+ {
+ "id": "custom.displayMode",
+ "value": "color-background"
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "transparent",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 5
+ },
+ {
+ "color": "#eab839",
+ "value": 45
+ },
+ {
+ "color": "orange",
+ "value": 65
+ },
+ {
+ "color": "red",
+ "value": 75
+ }
+ ]
+ }
+ },
+ {
+ "id": "custom.align",
+ "value": "right"
+ },
+ {
+ "id": "unit",
+ "value": "celsius"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Min Temp (C)"
+ },
+ "properties": [
+ {
+ "id": "custom.displayMode",
+ "value": "color-background"
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "transparent",
+ "value": null
+ },
+ {
+ "color": "green",
+ "value": 5
+ },
+ {
+ "color": "yellow",
+ "value": 45
+ },
+ {
+ "color": "orange",
+ "value": 65
+ },
+ {
+ "color": "red",
+ "value": 75
+ }
+ ]
+ }
+ },
+ {
+ "id": "custom.align",
+ "value": "right"
+ },
+ {
+ "id": "unit",
+ "value": "celsius"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 21
+ },
+ "id": 243,
+ "interval": "1m",
+ "maxDataPoints": 2,
+ "options": {
+ "footer": {
+ "fields": "",
+ "reducer": [
+ "sum"
+ ],
+ "show": false
+ },
+ "showHeader": true,
+ "sortBy": []
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "shelf_labels{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity,error_type,error_text) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ },
+ {
+ "exemplar": false,
+ "expr": "shelf_disk_count{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "B"
+ },
+ {
+ "exemplar": false,
+ "expr": "shelf_new_status{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "C"
+ },
+ {
+ "exemplar": false,
+ "expr": "shelf_power{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "D"
+ },
+ {
+ "exemplar": false,
+ "expr": "shelf_average_ambient_temperature{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "E"
+ },
+ {
+ "exemplar": false,
+ "expr": "shelf_min_ambient_temperature{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "K"
+ },
+ {
+ "exemplar": false,
+ "expr": "shelf_max_temperature{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "F"
+ },
+ {
+ "exemplar": false,
+ "expr": "shelf_average_temperature{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "G"
+ },
+ {
+ "exemplar": false,
+ "expr": "shelf_min_temperature{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "L"
+ },
+ {
+ "exemplar": false,
+ "expr": "shelf_max_fan_speed{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "H"
+ },
+ {
+ "exemplar": false,
+ "expr": "shelf_average_fan_speed{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "I"
+ },
+ {
+ "exemplar": false,
+ "expr": "shelf_min_fan_speed{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "J"
+ }
+ ],
+ "title": "Storage Shelf Issues",
+ "transformations": [
+ {
+ "id": "filterFieldsByName",
+ "options": {
+ "include": {
+ "names": [
+ "cluster",
+ "datacenter",
+ "model",
+ "module_type",
+ "op_status",
+ "serial_number",
+ "shelf",
+ "state",
+ "vendor_name",
+ "Value #A",
+ "Value #B",
+ "Value #C",
+ "error_text",
+ "error_type",
+ "severity"
+ ]
+ }
+ }
+ },
+ {
+ "id": "merge",
+ "options": {}
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Value #A": true,
+ "shelf_id": true,
+ "state": true
+ },
+ "indexByName": {
+ "Value #A": 13,
+ "Value #B": 10,
+ "Value #C": 14,
+ "cluster": 1,
+ "datacenter": 0,
+ "error_text": 3,
+ "error_type": 4,
+ "model": 6,
+ "module_type": 9,
+ "op_status": 11,
+ "serial_number": 7,
+ "severity": 5,
+ "shelf": 2,
+ "state": 12,
+ "vendor_name": 8
+ },
+ "renameByName": {
+ "Value #A": "",
+ "Value #B": "disk_count",
+ "Value #C": "state",
+ "Value #D": "Power",
+ "Value #E": "Avg Ambient Temp (C)",
+ "Value #F": "Max Temp (C)",
+ "Value #G": "Avg Temp (C)",
+ "Value #H": "Max Fan Speed (rpm)",
+ "Value #I": "Avg Fan Speed (rpm)",
+ "Value #J": "Min Fan Speed (rpm)",
+ "Value #K": "Min Ambient Temp (C)",
+ "Value #L": "Min Temp (C)",
+ "error_text": "Error",
+ "error_type": "Error Type",
+ "module_type": "",
+ "op_status": "",
+ "severity": "Severity",
+ "shelf_id": ""
+ }
+ }
+ }
+ ],
+ "type": "table"
+ }
+ ],
+ "title": "Shelves",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 30
+ },
+ "id": 266,
+ "panels": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Volumes with abnormal activity",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "align": "left",
+ "displayMode": "auto",
+ "filterable": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgb(31, 176, 196)",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Value #A"
+ },
+ "properties": [
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "1": {
+ "text": "online"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "from": 0,
+ "result": {
+ "text": "offline"
+ },
+ "to": 0.99
+ },
+ "type": "range"
+ }
+ ]
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgb(83, 179, 59)",
+ "value": null
+ },
+ {
+ "color": "semi-dark-red",
+ "value": 5
+ }
+ ]
+ }
+ },
+ {
+ "id": "custom.displayMode",
+ "value": "color-background-solid"
+ },
+ {
+ "id": "custom.width"
+ },
+ {
+ "id": "displayName",
+ "value": "status"
+ },
+ {
+ "id": "color",
+ "value": {
+ "mode": "thresholds"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Value #D"
+ },
+ "properties": [
+ {
+ "id": "displayName",
+ "value": "space used"
+ },
+ {
+ "id": "unit",
+ "value": "percent"
+ },
+ {
+ "id": "custom.displayMode",
+ "value": "gradient-gauge"
+ },
+ {
+ "id": "max",
+ "value": 100
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Size"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Dedupe Space Saved"
+ },
+ "properties": [
+ {
+ "id": "custom.width"
+ },
+ {
+ "id": "displayName",
+ "value": "Dedupe Space Saved"
+ },
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Compression Space Saved"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Total Space Saved"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Logical Space Used"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Physical Space Used"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 24,
+ "x": 0,
+ "y": 22
+ },
+ "id": 264,
+ "interval": "1m",
+ "maxDataPoints": 2,
+ "options": {
+ "showHeader": true,
+ "sortBy": [
+ {
+ "desc": true,
+ "displayName": "space used"
+ }
+ ]
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "volume_labels{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity,anti_ransomware_attack_probability) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "B"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_new_status{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_size_total{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "C"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_size_used_percent{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "D"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_sis_dedup_saved{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "E"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_sis_compress_saved{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "F"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_sis_dedup_saved+volume_sis_compress_saved{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "G"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_space_logical_used{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "H"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_space_physical_used{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "I"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Volumes with Ransomware Issues (9.10+ Only)",
+ "transformations": [
+ {
+ "id": "filterFieldsByName",
+ "options": {
+ "include": {
+ "names": [
+ "aggr",
+ "node",
+ "svm",
+ "volume",
+ "Value #A",
+ "Value #C",
+ "Value #D",
+ "Value #E",
+ "Value #F",
+ "Value #H",
+ "Value #I",
+ "antiRansomwareState",
+ "anti_ransomware_attack_probability"
+ ]
+ }
+ }
+ },
+ {
+ "id": "merge",
+ "options": {}
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value": true,
+ "Value #C": false,
+ "__name__": true,
+ "cluster": true,
+ "datacenter": true,
+ "instance": true,
+ "job": true,
+ "state": true
+ },
+ "indexByName": {
+ "Value #A": 6,
+ "Value #C": 7,
+ "Value #D": 8,
+ "Value #E": 11,
+ "Value #F": 12,
+ "Value #H": 9,
+ "Value #I": 10,
+ "aggr": 3,
+ "antiRansomwareState": 4,
+ "anti_ransomware_attack_probability": 5,
+ "node": 2,
+ "svm": 0,
+ "volume": 1
+ },
+ "renameByName": {
+ "Value #C": "Size",
+ "Value #D": "",
+ "Value #E": "Dedupe Space Saved",
+ "Value #F": "Compression Space Saved",
+ "Value #G": "Total Space Saved",
+ "Value #H": "Logical Space Used",
+ "Value #I": "Physical Space Used",
+ "antiRansomwareState": "Ransomware State",
+ "anti_ransomware_attack_probability": "Ransomware Attack Probability"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fieldConfig": {
+ "defaults": {
+ "custom": {
+ "align": "left",
+ "displayMode": "auto",
+ "filterable": false
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgb(31, 176, 196)",
+ "value": null
+ }
+ ]
+ },
+ "unit": "none"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Value #A"
+ },
+ "properties": [
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "1": {
+ "text": "online"
+ }
+ },
+ "type": "value"
+ },
+ {
+ "options": {
+ "from": 0,
+ "result": {
+ "text": "offline"
+ },
+ "to": 0.99
+ },
+ "type": "range"
+ }
+ ]
+ },
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgb(83, 179, 59)",
+ "value": null
+ },
+ {
+ "color": "semi-dark-red",
+ "value": 5
+ }
+ ]
+ }
+ },
+ {
+ "id": "custom.displayMode",
+ "value": "color-background-solid"
+ },
+ {
+ "id": "custom.width"
+ },
+ {
+ "id": "displayName",
+ "value": "status"
+ },
+ {
+ "id": "color",
+ "value": {
+ "mode": "thresholds"
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Value #D"
+ },
+ "properties": [
+ {
+ "id": "displayName",
+ "value": "space used"
+ },
+ {
+ "id": "unit",
+ "value": "percent"
+ },
+ {
+ "id": "custom.displayMode",
+ "value": "gradient-gauge"
+ },
+ {
+ "id": "max",
+ "value": 100
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Size"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Dedupe Space Saved"
+ },
+ "properties": [
+ {
+ "id": "custom.width"
+ },
+ {
+ "id": "displayName",
+ "value": "Dedupe Space Saved"
+ },
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Compression Space Saved"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Total Space Saved"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Logical Space Used"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Physical Space Used"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "bytes"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 9,
+ "w": 24,
+ "x": 0,
+ "y": 31
+ },
+ "id": 271,
+ "interval": "1m",
+ "maxDataPoints": 2,
+ "options": {
+ "showHeader": true,
+ "sortBy": [
+ {
+ "desc": true,
+ "displayName": "space used"
+ }
+ ]
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "volume_labels{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity,movement_state) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "B"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_new_status{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_size_total{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "C"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_size_used_percent{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "D"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_sis_dedup_saved{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "E"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_sis_compress_saved{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "F"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_sis_dedup_saved+volume_sis_compress_saved{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "G"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_space_logical_used{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "H"
+ },
+ {
+ "exemplar": false,
+ "expr": "volume_space_physical_used{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "I"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Volumes Move Issues",
+ "transformations": [
+ {
+ "id": "filterFieldsByName",
+ "options": {
+ "include": {
+ "names": [
+ "aggr",
+ "cluster",
+ "datacenter",
+ "movement_state",
+ "node",
+ "severity",
+ "svm",
+ "volume",
+ "Value #A",
+ "Value #C",
+ "Value #D",
+ "Value #E",
+ "Value #F",
+ "Value #H",
+ "Value #I"
+ ]
+ }
+ }
+ },
+ {
+ "id": "merge",
+ "options": {}
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value": true,
+ "Value #A": false,
+ "Value #C": false,
+ "__name__": true,
+ "cluster": false,
+ "datacenter": false,
+ "instance": true,
+ "job": true,
+ "state": true
+ },
+ "indexByName": {
+ "Value #A": 8,
+ "Value #C": 9,
+ "Value #D": 10,
+ "Value #E": 13,
+ "Value #F": 14,
+ "Value #H": 11,
+ "Value #I": 12,
+ "aggr": 7,
+ "cluster": 1,
+ "datacenter": 0,
+ "movement_state": 5,
+ "node": 6,
+ "severity": 4,
+ "svm": 2,
+ "volume": 3
+ },
+ "renameByName": {
+ "Value #C": "Size",
+ "Value #D": "",
+ "Value #E": "Dedupe Space Saved",
+ "Value #F": "Compression Space Saved",
+ "Value #G": "Total Space Saved",
+ "Value #H": "Logical Space Used",
+ "Value #I": "Physical Space Used",
+ "aggr": "Aggregate",
+ "antiRansomwareState": "Ransomware State",
+ "anti_ransomware_attack_probability": "Ransomware Attack Probability",
+ "cluster": "Cluster",
+ "datacenter": "Datacenter",
+ "movement_state": "Movement State",
+ "node": "Node",
+ "severity": "Severity",
+ "svm": "SVM",
+ "volume": "Volume"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ }
+ ],
+ "title": "Volume",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 31
+ },
+ "id": 274,
+ "panels": [
+ {
+ "datasource": null,
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "displayMode": "auto"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 1
+ },
+ "id": 276,
+ "options": {
+ "showHeader": true
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "health_license_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Non Compliant License",
+ "transformations": [
+ {
+ "id": "filterFieldsByName",
+ "options": {
+ "include": {
+ "names": [
+ "cluster",
+ "datacenter",
+ "name",
+ "scope",
+ "severity",
+ "state"
+ ]
+ }
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {},
+ "indexByName": {},
+ "renameByName": {
+ "cluster": "Cluster",
+ "datacenter": "Datacenter",
+ "name": "Name",
+ "scope": "Scope",
+ "severity": "Severity",
+ "state": "State"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ }
+ ],
+ "title": "License",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 32
+ },
+ "id": 255,
+ "panels": [
+ {
+ "datasource": null,
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "displayMode": "auto"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Home?"
+ },
+ "properties": [
+ {
+ "id": "mappings",
+ "value": [
+ {
+ "options": {
+ "false": {
+ "index": 0,
+ "text": "No"
+ }
+ },
+ "type": "value"
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 23
+ },
+ "id": 257,
+ "options": {
+ "showHeader": true
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "health_network_interface_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Network interfaces not at home port",
+ "transformations": [
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value": true,
+ "__name__": true,
+ "instance": true,
+ "job": true
+ },
+ "indexByName": {
+ "Time": 0,
+ "Value": 9,
+ "__name__": 1,
+ "cluster": 3,
+ "datacenter": 2,
+ "instance": 7,
+ "isHome": 5,
+ "job": 8,
+ "lif": 4,
+ "severity": 6
+ },
+ "renameByName": {
+ "cluster": "Cluster",
+ "datacenter": "Datacenter",
+ "instance": "",
+ "isHome": "Home?",
+ "lif": "Network Interface",
+ "severity": "Severity"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ }
+ ],
+ "title": "Network Interface",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 33
+ },
+ "id": 259,
+ "panels": [
+ {
+ "datasource": null,
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "displayMode": "auto"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 24
+ },
+ "id": 261,
+ "options": {
+ "showHeader": true
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "health_network_ethernet_port_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Ethernet ports are down",
+ "transformations": [
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value": true,
+ "__name__": true,
+ "instance": true,
+ "job": true
+ },
+ "indexByName": {
+ "Time": 0,
+ "Value": 10,
+ "__name__": 1,
+ "cluster": 3,
+ "datacenter": 2,
+ "instance": 4,
+ "job": 5,
+ "node": 6,
+ "port": 7,
+ "severity": 9,
+ "state": 8
+ },
+ "renameByName": {
+ "cluster": "Cluster",
+ "datacenter": "Datacenter",
+ "node": "Node",
+ "port": "Port",
+ "severity": "Severity",
+ "state": "State"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ },
+ {
+ "datasource": null,
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "displayMode": "auto"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 32
+ },
+ "id": 262,
+ "options": {
+ "showHeader": true
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "health_network_fc_port_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}",
+ "format": "table",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "FC ports are down",
+ "transformations": [
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value": true,
+ "__name__": true,
+ "instance": true,
+ "job": true
+ },
+ "indexByName": {
+ "Time": 0,
+ "Value": 10,
+ "__name__": 1,
+ "cluster": 2,
+ "datacenter": 3,
+ "instance": 4,
+ "job": 5,
+ "node": 6,
+ "port": 7,
+ "severity": 9,
+ "state": 8
+ },
+ "renameByName": {
+ "cluster": "Cluster",
+ "datacenter": "Datacenter",
+ "node": "Node",
+ "port": "Port",
+ "severity": "Severity",
+ "state": "State"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ }
+ ],
+ "title": "Network Port",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 34
+ },
+ "id": 235,
+ "panels": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "$EMSDescription",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "displayMode": "auto"
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": []
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 35
+ },
+ "id": 237,
+ "options": {
+ "showHeader": true
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "last_over_time(ems_events{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"emergency\"}[$__range]) == 1",
+ "format": "table",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "Emergency EMS",
+ "transformations": [
+ {
+ "id": "filterFieldsByName",
+ "options": {
+ "include": {
+ "pattern": "/^(datacenter|cluster|message|node|severity)$/"
+ }
+ }
+ },
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {},
+ "indexByName": {
+ "cluster": 1,
+ "datacenter": 0,
+ "message": 2,
+ "node": 3,
+ "severity": 4
+ },
+ "renameByName": {
+ "cluster": "Cluster",
+ "datacenter": "Datacenter",
+ "message": "Message",
+ "node": "Node",
+ "severity": "Severity"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ }
+ ],
+ "title": "Emergency EMS",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 35
+ },
+ "id": 247,
+ "panels": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "These are the health monitor events that have occurred within the selected time range",
+ "fieldConfig": {
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "custom": {
+ "align": "auto",
+ "displayMode": "auto",
+ "filterable": true
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "Datacenter"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "string"
+ }
+ ]
+ }
+ ]
+ },
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 36
+ },
+ "id": 249,
+ "options": {
+ "showHeader": true
+ },
+ "pluginVersion": "8.1.8",
+ "targets": [
+ {
+ "exemplar": false,
+ "expr": "last_over_time(health_support_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}[$__range]) == 1",
+ "format": "table",
+ "instant": true,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "title": "System Alerts",
+ "transformations": [
+ {
+ "id": "organize",
+ "options": {
+ "excludeByName": {
+ "Time": true,
+ "Value": true,
+ "__name__": true,
+ "index": true,
+ "instance": true,
+ "job": true,
+ "node": true,
+ "owner_node": true
+ },
+ "indexByName": {
+ "Time": 0,
+ "Value": 6,
+ "__name__": 7,
+ "cluster": 2,
+ "correctiveAction": 12,
+ "datacenter": 1,
+ "instance": 4,
+ "job": 5,
+ "monitor": 9,
+ "name": 8,
+ "node": 13,
+ "reason": 11,
+ "resource": 10,
+ "severity": 3
+ },
+ "renameByName": {
+ "cluster": "Cluster",
+ "container_type": "Container Type",
+ "correctiveAction": "Corrective Action",
+ "datacenter": "Datacenter",
+ "disk": "Disk",
+ "model": "Model",
+ "monitor": "Monitor",
+ "name": "Name",
+ "reason": "Reason",
+ "resource": "Resource",
+ "serial_number": "Serial Number",
+ "severity": "Severity",
+ "shelf": "Shelf"
+ }
+ }
+ }
+ ],
+ "type": "table"
+ }
+ ],
+ "title": "System Health Alerts",
+ "type": "row"
+ }
+ ],
+ "refresh": "",
+ "schemaVersion": 30,
+ "style": "dark",
+ "tags": [
+ "harvest",
+ "ontap",
+ "cdot"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "Prometheus",
+ "value": "Prometheus"
+ },
+ "description": null,
+ "error": null,
+ "hide": 2,
+ "includeAll": false,
+ "label": "Data Source",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(node_labels{system_type!=\"7mode\"},datacenter)",
+ "description": null,
+ "error": null,
+ "hide": 0,
+ "includeAll": false,
+ "label": "",
+ "multi": true,
+ "name": "Datacenter",
+ "options": [],
+ "query": {
+ "query": "label_values(node_labels{system_type!=\"7mode\"},datacenter)",
+ "refId": "Prometheus-Datacenter-Variable-Query"
+ },
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(node_labels{system_type!=\"7mode\",datacenter=~\"$Datacenter\"},cluster)",
+ "description": null,
+ "error": null,
+ "hide": 0,
+ "includeAll": true,
+ "label": "",
+ "multi": true,
+ "name": "Cluster",
+ "options": [],
+ "query": {
+ "query": "label_values(node_labels{system_type!=\"7mode\",datacenter=~\"$Datacenter\"},cluster)",
+ "refId": "StandardVariableQuery"
+ },
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "current": {
+ "selected": false,
+ "text": "The EMS collector gathers EMS events as defined in your ems.yml file. This panel displays events with emergency severity that occurred within the selected time range.",
+ "value": "The EMS collector gathers EMS events as defined in your ems.yml file. This panel displays events with emergency severity that occurred within the selected time range."
+ },
+ "description": null,
+ "error": null,
+ "hide": 2,
+ "label": null,
+ "name": "EMSDescription",
+ "options": [
+ {
+ "selected": true,
+ "text": "The EMS collector gathers EMS events as defined in your ems.yml file. This panel displays events with emergency severity that occurred within the selected time range.",
+ "value": "The EMS collector gathers EMS events as defined in your ems.yml file. This panel displays events with emergency severity that occurred within the selected time range."
+ }
+ ],
+ "query": "The EMS collector gathers EMS events as defined in your ems.yml file. This panel displays events with emergency severity that occurred within the selected time range.",
+ "skipUrlSync": false,
+ "type": "textbox"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-24h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ]
+ },
+ "timezone": "",
+ "title": "ONTAP: Health",
+ "uid": "",
+ "version": 1
+}
\ No newline at end of file
diff --git a/grafana/dashboards/cmode/nfs_clients.json b/grafana/dashboards/cmode/nfs_clients.json
index 8ee7c7e36..744ab53aa 100644
--- a/grafana/dashboards/cmode/nfs_clients.json
+++ b/grafana/dashboards/cmode/nfs_clients.json
@@ -71,7 +71,7 @@
"gnetId": null,
"graphTooltip": 1,
"id": null,
- "iteration": 1665135798398,
+ "iteration": 1680864207753,
"links": [
{
"asDropdown": true,
@@ -227,8 +227,8 @@
"id": 27,
"options": {
"legend": {
- "displayMode": "hidden",
- "placement": "right",
+ "displayMode": "table",
+ "placement": "bottom",
"values": [
"value"
]
@@ -577,5 +577,5 @@
"timezone": "",
"title": "ONTAP: NFS Clients",
"uid": "",
- "version": 2
-}
+ "version": 3
+}
\ No newline at end of file
diff --git a/integration/test/data/counter_data.go b/integration/test/data/counter_data.go
index f85b42733..d3764d391 100644
--- a/integration/test/data/counter_data.go
+++ b/integration/test/data/counter_data.go
@@ -42,6 +42,8 @@ func GetCounterMap() map[string][]string {
"svm_nfs_read_latency_hist_bucket",
"svm_nfs_write_latency_hist_bucket",
"smb2_",
+ "health_",
+ "ems_events",
}
//if docker.IsDockerBasedPoller() || setup.IsMac {
counterMap["NO_DATA_CONTAINS"] = append(counterMap["NO_DATA_CONTAINS"], "poller", "metadata_exporter_count")