diff --git a/cmd/collectors/commonutils.go b/cmd/collectors/commonutils.go index 71413e03e..847cc7722 100644 --- a/cmd/collectors/commonutils.go +++ b/cmd/collectors/commonutils.go @@ -2,14 +2,19 @@ package collectors import ( "github.com/netapp/harvest/v2/cmd/tools/rest" + "github.com/netapp/harvest/v2/pkg/errs" "github.com/netapp/harvest/v2/pkg/logging" "github.com/netapp/harvest/v2/pkg/matrix" + "github.com/netapp/harvest/v2/pkg/tree/node" "github.com/tidwall/gjson" "strings" "time" ) -const DefaultBatchSize = "500" +const ( + DefaultBatchSize = "500" + MaxAllowedTimeDrift = 10 * time.Second +) func InvokeRestCall(client *rest.Client, href string, logger *logging.Logger) ([]gjson.Result, error) { result, err := rest.Fetch(client, href) @@ -25,6 +30,70 @@ func InvokeRestCall(client *rest.Client, href string, logger *logging.Logger) ([ return result, nil } +func GetClusterTime(client *rest.Client, returnTimeOut string, logger *logging.Logger) (time.Time, error) { + var ( + err error + records []gjson.Result + clusterTime time.Time + timeOfNodes []int64 + ) + + query := "private/cli/cluster/date" + fields := []string{"date"} + + href := rest.BuildHref(query, strings.Join(fields, ","), nil, "", "", "1", returnTimeOut, "") + + if records, err = rest.Fetch(client, href); err != nil { + return clusterTime, err + } + if len(records) == 0 { + return clusterTime, errs.New(errs.ErrConfig, " date not found on cluster") + } + + for _, instanceData := range records { + currentClusterDate := instanceData.Get("date") + if currentClusterDate.Exists() { + t, err := time.Parse(time.RFC3339, currentClusterDate.String()) + if err != nil { + logger.Error().Str("date", currentClusterDate.String()).Err(err).Msg("Failed to load cluster date") + continue + } + clusterTime = t + timeOfNodes = append(timeOfNodes, t.UnixNano()) + } + } + + for _, timeOfEachNode := range timeOfNodes { + timeDrift := time.Duration(timeOfEachNode - timeOfNodes[0]).Abs() + if timeDrift >= MaxAllowedTimeDrift { + logger.Warn().Float64("timedrift(in sec)", timeDrift.Seconds()).Msg("Time drift exist among the nodes") + break + } + } + + logger.Debug().Str("cluster time", clusterTime.String()).Msg("") + return clusterTime, nil +} + +// GetDataInterval fetch pollData interval +func GetDataInterval(param *node.Node, defaultInterval time.Duration) (time.Duration, error) { + var dataIntervalStr string + var durationVal time.Duration + var err error + schedule := param.GetChildS("schedule") + if schedule != nil { + dataInterval := schedule.GetChildS("data") + if dataInterval != nil { + dataIntervalStr = dataInterval.GetContentS() + if durationVal, err = time.ParseDuration(dataIntervalStr); err == nil { + return durationVal, nil + } + return defaultInterval, err + } + } + return defaultInterval, nil +} + func UpdateProtectedFields(instance *matrix.Instance) { // check for group_type diff --git a/cmd/collectors/commonutils_test.go b/cmd/collectors/commonutils_test.go index 03a5034eb..db9421049 100644 --- a/cmd/collectors/commonutils_test.go +++ b/cmd/collectors/commonutils_test.go @@ -2,6 +2,7 @@ package collectors import ( "github.com/netapp/harvest/v2/pkg/matrix" + "github.com/netapp/harvest/v2/pkg/tree/node" "testing" "time" ) @@ -256,3 +257,43 @@ func testNewerTimestampThanDuration(t *testing.T) { t.Errorf("timestamp= %f is newer than duration %s", timestamp, duration.String()) } } + +func TestGetDataInterval(t *testing.T) { + defaultDataPollDuration := 3 * time.Minute + type args struct { + param *node.Node + defaultInterval time.Duration + } + + type test struct { + name string + args args + want float64 + wantErr bool + } + tests := []test{ + {"success_return_poller_schedule", args{param: generateScheduleParam("4m"), defaultInterval: defaultDataPollDuration}, 240, false}, + {"error_return_default_schedule", args{param: generateScheduleParam("4ma"), defaultInterval: defaultDataPollDuration}, 180, true}, + {"return_default_schedule", args{param: generateScheduleParam(""), defaultInterval: defaultDataPollDuration}, 180, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := GetDataInterval(tt.args.param, tt.args.defaultInterval) + if (err != nil) != tt.wantErr { + t.Errorf("GetDataInterval() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got.Seconds() != tt.want { + t.Errorf("GetDataInterval() got = %v, want %v", got, tt.want) + } + }) + } +} + +func generateScheduleParam(duration string) *node.Node { + root := node.NewS("root") + param := root.NewChildS("schedule", "") + param.NewChildS("data", duration) + return root +} diff --git a/cmd/collectors/ems/ems.go b/cmd/collectors/ems/ems.go index 8d4d6762a..e3dc3edac 100644 --- a/cmd/collectors/ems/ems.go +++ b/cmd/collectors/ems/ems.go @@ -24,7 +24,6 @@ const defaultSeverityFilter = "alert|emergency|error|informational|notice" const MaxBookendInstances = 1000 const DefaultBookendResolutionDuration = 28 * 24 * time.Hour // 28 days == 672 hours const Hyphen = "-" -const MaxAllowedTimeDrift = 10 * time.Second type Ems struct { *rest2.Rest // provides: AbstractCollector, Client, Object, Query, TemplateFn, TemplateType @@ -248,58 +247,13 @@ func (e *Ems) InitCache() error { return nil } -func (e *Ems) getClusterTime() (time.Time, error) { - var ( - err error - records []gjson.Result - clusterTime time.Time - timeOfNodes []int64 - ) - - query := "private/cli/cluster/date" - fields := []string{"date"} - - href := rest.BuildHref(query, strings.Join(fields, ","), nil, "", "", "1", e.ReturnTimeOut, "") - - if records, err = e.GetRestData(href); err != nil { - return clusterTime, err - } - if len(records) == 0 { - return clusterTime, errs.New(errs.ErrConfig, e.Object+" date not found on cluster") - } - - for _, instanceData := range records { - currentClusterDate := instanceData.Get("date") - if currentClusterDate.Exists() { - t, err := time.Parse(time.RFC3339, currentClusterDate.String()) - if err != nil { - e.Logger.Error().Str("date", currentClusterDate.String()).Err(err).Msg("Failed to load cluster date") - continue - } - clusterTime = t - timeOfNodes = append(timeOfNodes, t.UnixNano()) - } - } - - for _, timeOfEachNode := range timeOfNodes { - timeDrift := time.Duration(timeOfEachNode - timeOfNodes[0]).Abs() - if timeDrift >= MaxAllowedTimeDrift { - e.Logger.Warn().Float64("timedrift(in sec)", timeDrift.Seconds()).Msg("Time drift exist among the nodes") - break - } - } - - e.Logger.Debug().Str("cluster time", clusterTime.String()).Msg("") - return clusterTime, nil -} - // returns time filter (clustertime - polldata duration) func (e *Ems) getTimeStampFilter(clusterTime time.Time) string { fromTime := e.lastFilterTime // check if this is the first request if e.lastFilterTime == 0 { // if first request fetch cluster time - dataDuration, err := GetDataInterval(e.GetParams(), defaultDataPollDuration) + dataDuration, err := collectors.GetDataInterval(e.GetParams(), defaultDataPollDuration) if err != nil { e.Logger.Warn().Err(err). Str("defaultDataPollDuration", defaultDataPollDuration.String()). @@ -421,7 +375,7 @@ func (e *Ems) PollData() (map[string]*matrix.Matrix, error) { startTime = time.Now() // add time filter - clusterTime, err := e.getClusterTime() + clusterTime, err := collectors.GetClusterTime(e.Client, e.ReturnTimeOut, e.Logger) if err != nil { return nil, err } @@ -507,25 +461,6 @@ func (e *Ems) getHref(names []string, filter []string) string { return href } -// GetDataInterval fetch pollData interval -func GetDataInterval(param *node.Node, defaultInterval time.Duration) (time.Duration, error) { - var dataIntervalStr string - var durationVal time.Duration - var err error - schedule := param.GetChildS("schedule") - if schedule != nil { - dataInterval := schedule.GetChildS("data") - if dataInterval != nil { - dataIntervalStr = dataInterval.GetContentS() - if durationVal, err = time.ParseDuration(dataIntervalStr); err == nil { - return durationVal, nil - } - return defaultInterval, err - } - } - return defaultInterval, nil -} - func parseProperties(instanceData gjson.Result, property string) gjson.Result { if !strings.HasPrefix(property, "parameters.") { diff --git a/cmd/collectors/rest/plugins/health/health.go b/cmd/collectors/rest/plugins/health/health.go new file mode 100644 index 000000000..a8f5d098d --- /dev/null +++ b/cmd/collectors/rest/plugins/health/health.go @@ -0,0 +1,717 @@ +package health + +import ( + "fmt" + goversion "github.com/hashicorp/go-version" + "github.com/netapp/harvest/v2/cmd/collectors" + "github.com/netapp/harvest/v2/cmd/poller/plugin" + "github.com/netapp/harvest/v2/cmd/tools/rest" + "github.com/netapp/harvest/v2/pkg/conf" + "github.com/netapp/harvest/v2/pkg/errs" + "github.com/netapp/harvest/v2/pkg/matrix" + "github.com/tidwall/gjson" + "strconv" + "strings" + "time" +) + +type AlertSeverity string + +const ( + errr AlertSeverity = "error" + warning AlertSeverity = "warning" + diskHealthMatrix = "health_disk" + shelfHealthMatrix = "health_shelf" + supportHealthMatrix = "health_support" + nodeHealthMatrix = "health_node" + networkEthernetPortHealthMatrix = "health_network_ethernet_port" + networkFCPortHealthMatrix = "health_network_fc_port" + networkInterfaceHealthMatrix = "health_network_interface" + volumeRansomwareHealthMatrix = "health_volume_ransomware" + volumeMoveHealthMatrix = "health_volume_move" + licenseHealthMatrix = "health_license" + severityLabel = "severity" + defaultDataPollDuration = 3 * time.Minute +) + +type Health struct { + *plugin.AbstractPlugin + client *rest.Client + data map[string]*matrix.Matrix + lastFilterTime int64 +} + +func New(p *plugin.AbstractPlugin) plugin.Plugin { + return &Health{AbstractPlugin: p} +} + +var metrics = []string{ + "alerts", +} + +func (h *Health) Init() error { + + var err error + + if err = h.InitAbc(); err != nil { + return err + } + + if err = h.initAllMatrix(); err != nil { + return err + } + + timeout, _ := time.ParseDuration(rest.DefaultTimeout) + if h.client, err = rest.New(conf.ZapiPoller(h.ParentParams), timeout, h.Auth); err != nil { + return err + } + + if err = h.client.Init(5); err != nil { + return err + } + + return nil +} + +func (h *Health) initAllMatrix() error { + h.data = make(map[string]*matrix.Matrix) + mats := []string{diskHealthMatrix, shelfHealthMatrix, supportHealthMatrix, nodeHealthMatrix, + networkEthernetPortHealthMatrix, networkFCPortHealthMatrix, networkInterfaceHealthMatrix, + volumeRansomwareHealthMatrix, volumeMoveHealthMatrix, licenseHealthMatrix} + for _, m := range mats { + if err := h.initMatrix(m); err != nil { + return err + } + } + return nil +} + +func (h *Health) initMatrix(name string) error { + h.data[name] = matrix.New(h.Parent+name, name, name) + for _, v1 := range h.data { + v1.SetExportOptions(matrix.DefaultExportOptions()) + } + for _, k := range metrics { + err := matrix.CreateMetric(k, h.data[name]) + if err != nil { + h.Logger.Warn().Err(err).Str("key", k).Msg("error while creating metric") + return err + } + } + return nil +} + +func (h *Health) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, error) { + data := dataMap[h.Object] + clusterVersion := h.client.Cluster().GetVersion() + ontapVersion, err := goversion.NewVersion(clusterVersion) + if err != nil { + h.Logger.Error().Err(err). + Str("version", clusterVersion). + Msg("Failed to parse version") + return nil, nil + } + version96 := "9.6" + version96After, err := goversion.NewVersion(version96) + if err != nil { + h.Logger.Error().Err(err). + Str("version", version96). + Msg("Failed to parse version") + return nil, nil + } + + if ontapVersion.LessThan(version96After) { + return nil, nil + } + + // Purge and reset data + // remove all metrics as analytics label may change over time + err = h.initAllMatrix() + if err != nil { + h.Logger.Warn().Err(err).Msg("error while init matrix") + return nil, err + } + for k := range h.data { + // Set all global labels if already not exist + h.data[k].SetGlobalLabels(data.GetGlobalLabels()) + } + + h.collectDiskAlerts() + h.collectShelfAlerts() + h.collectSupportAlerts() + h.collectNodeAlerts() + h.collectNetworkEthernetPortAlerts() + h.collectNetworkFCPortAlerts() + h.collectNetworkInterfacesAlerts() + h.collectVolumeRansomwareAlerts() + h.collectVolumeMoveAlerts() + h.collectLicenseAlerts() + + result := make([]*matrix.Matrix, 0, len(h.data)) + + for _, value := range h.data { + result = append(result, value) + } + return result, nil +} + +func (h *Health) collectLicenseAlerts() { + var ( + instance *matrix.Instance + ) + + records, err := h.getNonCompliantLicense() + if err != nil { + if errs.IsRestErr(err, errs.APINotFound) { + h.Logger.Debug().Err(err).Msg("API not found") + } else { + h.Logger.Error().Err(err).Msg("Failed to collect analytic data") + } + return + } + mat := h.data[licenseHealthMatrix] + for _, record := range records { + name := record.Get("name").String() + scope := record.Get("scope").String() + state := record.Get("state").String() + instance, err = mat.NewInstance(name) + if err != nil { + h.Logger.Warn().Str("key", name).Msg("error while creating instance") + continue + } + instance.SetLabel("name", name) + instance.SetLabel("scope", scope) + instance.SetLabel("state", state) + instance.SetLabel(severityLabel, string(errr)) + + h.setAlertMetric(mat, instance) + } +} + +func (h *Health) collectVolumeMoveAlerts() { + var ( + instance *matrix.Instance + ) + + records, err := h.getMoveFailedVolumes() + if err != nil { + if errs.IsRestErr(err, errs.APINotFound) { + h.Logger.Debug().Err(err).Msg("API not found") + } else { + h.Logger.Error().Err(err).Msg("Failed to collect analytic data") + } + return + } + mat := h.data[volumeMoveHealthMatrix] + for _, record := range records { + uuid := record.Get("uuid").String() + volume := record.Get("name").String() + svm := record.Get("svm.name").String() + movementState := record.Get("movement.state").String() + instance, err = mat.NewInstance(uuid) + if err != nil { + h.Logger.Warn().Str("key", uuid).Msg("error while creating instance") + continue + } + instance.SetLabel("movement_state", movementState) + instance.SetLabel("svm", svm) + instance.SetLabel("volume", volume) + instance.SetLabel(severityLabel, string(warning)) + + h.setAlertMetric(mat, instance) + } +} + +func (h *Health) collectVolumeRansomwareAlerts() { + var ( + instance *matrix.Instance + ) + clusterVersion := h.client.Cluster().GetVersion() + ontapVersion, err := goversion.NewVersion(clusterVersion) + if err != nil { + h.Logger.Error().Err(err). + Str("version", clusterVersion). + Msg("Failed to parse version") + return + } + version910 := "9.10" + version910After, err := goversion.NewVersion(version910) + if err != nil { + h.Logger.Error().Err(err). + Str("version", version910). + Msg("Failed to parse version") + return + } + + if ontapVersion.LessThan(version910After) { + return + } + records, err := h.getRansomwareVolumes() + if err != nil { + if errs.IsRestErr(err, errs.APINotFound) { + h.Logger.Debug().Err(err).Msg("API not found") + } else { + h.Logger.Error().Err(err).Msg("Failed to collect analytic data") + } + return + } + mat := h.data[volumeRansomwareHealthMatrix] + for _, record := range records { + uuid := record.Get("uuid").String() + volume := record.Get("name").String() + antiRansomwareAttackProbability := record.Get("anti_ransomware.attack_probability").String() + instance, err = mat.NewInstance(uuid) + if err != nil { + h.Logger.Warn().Str("key", uuid).Msg("error while creating instance") + continue + } + instance.SetLabel("anti_ransomware_attack_probability", antiRansomwareAttackProbability) + + instance.SetLabel("volume", volume) + instance.SetLabel(severityLabel, string(errr)) + + h.setAlertMetric(mat, instance) + } +} + +func (h *Health) collectNetworkInterfacesAlerts() { + var ( + instance *matrix.Instance + ) + records, err := h.getNonHomeLIFs() + if err != nil { + if errs.IsRestErr(err, errs.APINotFound) { + h.Logger.Debug().Err(err).Msg("API not found") + } else { + h.Logger.Error().Err(err).Msg("Failed to collect analytic data") + } + return + } + mat := h.data[networkInterfaceHealthMatrix] + for _, record := range records { + uuid := record.Get("uuid").String() + lif := record.Get("name").String() + isHome := record.Get("location.is_home").String() + instance, err = mat.NewInstance(uuid) + if err != nil { + h.Logger.Warn().Str("key", uuid).Msg("error while creating instance") + continue + } + instance.SetLabel("isHome", isHome) + instance.SetLabel("lif", lif) + instance.SetLabel(severityLabel, string(warning)) + + h.setAlertMetric(mat, instance) + } +} + +func (h *Health) collectNetworkFCPortAlerts() { + var ( + instance *matrix.Instance + ) + records, err := h.getFCPorts() + if err != nil { + if errs.IsRestErr(err, errs.APINotFound) { + h.Logger.Debug().Err(err).Msg("API not found") + } else { + h.Logger.Error().Err(err).Msg("Failed to collect analytic data") + } + return + } + mat := h.data[networkFCPortHealthMatrix] + for _, record := range records { + uuid := record.Get("uuid").String() + nodeName := record.Get("node.name").String() + port := record.Get("name").String() + state := record.Get("state").String() + instance, err = mat.NewInstance(uuid) + if err != nil { + h.Logger.Warn().Str("key", uuid).Msg("error while creating instance") + continue + } + instance.SetLabel("node", nodeName) + instance.SetLabel("state", state) + instance.SetLabel("port", port) + instance.SetLabel(severityLabel, string(errr)) + + h.setAlertMetric(mat, instance) + } +} + +func (h *Health) collectNetworkEthernetPortAlerts() { + var ( + instance *matrix.Instance + ) + records, err := h.getEthernetPorts() + if err != nil { + if errs.IsRestErr(err, errs.APINotFound) { + h.Logger.Debug().Err(err).Msg("API not found") + } else { + h.Logger.Error().Err(err).Msg("Failed to collect analytic data") + } + return + } + mat := h.data[networkEthernetPortHealthMatrix] + for _, record := range records { + uuid := record.Get("uuid").String() + port := record.Get("name").String() + nodeName := record.Get("node.name").String() + portType := record.Get("type").String() + state := record.Get("state").String() + instance, err = mat.NewInstance(uuid) + if err != nil { + h.Logger.Warn().Str("key", uuid).Msg("error while creating instance") + continue + } + instance.SetLabel("node", nodeName) + instance.SetLabel("state", state) + instance.SetLabel("port", port) + instance.SetLabel("type", portType) + instance.SetLabel(severityLabel, string(errr)) + + h.setAlertMetric(mat, instance) + } +} + +func (h *Health) collectNodeAlerts() { + var ( + instance *matrix.Instance + ) + records, err := h.getNodes() + if err != nil { + if errs.IsRestErr(err, errs.APINotFound) { + h.Logger.Debug().Err(err).Msg("API not found") + } else { + h.Logger.Error().Err(err).Msg("Failed to collect analytic data") + } + return + } + mat := h.data[nodeHealthMatrix] + for _, record := range records { + nodeName := record.Get("node").String() + + instance, err = mat.NewInstance(nodeName) + if err != nil { + h.Logger.Warn().Str("key", nodeName).Msg("error while creating instance") + continue + } + instance.SetLabel("node", nodeName) + instance.SetLabel("healthy", "false") + instance.SetLabel(severityLabel, string(errr)) + + h.setAlertMetric(mat, instance) + } +} + +func (h *Health) collectShelfAlerts() { + var ( + instance *matrix.Instance + ) + records, err := h.getShelves() + if err != nil { + if errs.IsRestErr(err, errs.APINotFound) { + h.Logger.Debug().Err(err).Msg("API not found") + } else { + h.Logger.Error().Err(err).Msg("Failed to collect analytic data") + } + return + } + mat := h.data[shelfHealthMatrix] + for _, record := range records { + shelf := record.Get("shelf").String() + errorType := record.Get("error_type").String() + errorSeverity := record.Get("error_severity").String() + errorText := record.Get("error_text").String() + + //errorSeverity possible values are unknown|notice|warning|error|critical + if errorSeverity == "error" || errorSeverity == "critical" || errorSeverity == "warning" { + instance, err = mat.NewInstance(shelf) + if err != nil { + h.Logger.Warn().Str("key", shelf).Msg("error while creating instance") + continue + } + instance.SetLabel("shelf", shelf) + instance.SetLabel("error_type", errorType) + instance.SetLabel("error_text", errorText) + if errorSeverity == "error" || errorSeverity == "critical" { + instance.SetLabel(severityLabel, string(errr)) + } else if errorSeverity == "warning" { + instance.SetLabel(severityLabel, string(warning)) + } + + h.setAlertMetric(mat, instance) + } + } +} + +func (h *Health) collectSupportAlerts() { + var ( + instance *matrix.Instance + ) + clusterTime, err := collectors.GetClusterTime(h.client, "", h.Logger) + if err != nil { + h.Logger.Error().Err(err).Msg("Failed to collect cluster time") + return + } + toTime := clusterTime.Unix() + timeFilter := h.getTimeStampFilter(clusterTime) + addFilter := []string{"suppress=false"} + filter := append(addFilter, timeFilter) + + records, err := h.getSupportAlerts(filter) + if err != nil { + if errs.IsRestErr(err, errs.APINotFound) { + h.Logger.Debug().Err(err).Msg("API not found") + } else { + h.Logger.Error().Err(err).Msg("Failed to collect analytic data") + } + return + } + mat := h.data[supportHealthMatrix] + for index, record := range records { + nodeName := record.Get("node.name").String() + monitor := record.Get("monitor").String() + name := record.Get("name").String() + resource := record.Get("resource").String() + reason := record.Get("cause.message").String() + correctiveAction := record.Get("corrective_action.message").String() + instance, err = mat.NewInstance(strconv.Itoa(index)) + if err != nil { + h.Logger.Warn().Int("key", index).Msg("error while creating instance") + continue + } + instance.SetLabel("node", nodeName) + instance.SetLabel("monitor", monitor) + instance.SetLabel("name", name) + instance.SetLabel("resource", resource) + instance.SetLabel("reason", reason) + instance.SetLabel("correctiveAction", correctiveAction) + instance.SetLabel(severityLabel, string(warning)) + + h.setAlertMetric(mat, instance) + } + // update lastFilterTime to current cluster time + h.lastFilterTime = toTime +} + +func (h *Health) collectDiskAlerts() { + var ( + instance *matrix.Instance + ) + records, err := h.getDisks() + if err != nil { + if errs.IsRestErr(err, errs.APINotFound) { + h.Logger.Debug().Err(err).Msg("API not found") + } else { + h.Logger.Error().Err(err).Msg("Failed to collect analytic data") + } + return + } + mat := h.data[diskHealthMatrix] + for _, record := range records { + name := record.Get("name").String() + containerType := record.Get("container_type").String() + instance, err = mat.NewInstance(name) + if err != nil { + h.Logger.Warn().Str("key", name).Msg("error while creating instance") + continue + } + instance.SetLabel("disk", name) + instance.SetLabel("container_type", containerType) + if containerType == "broken" { + instance.SetLabel(severityLabel, string(errr)) + } else if containerType == "unassigned" { + instance.SetLabel(severityLabel, string(warning)) + } + + h.setAlertMetric(mat, instance) + } +} + +func (h *Health) getDisks() ([]gjson.Result, error) { + var ( + result []gjson.Result + err error + ) + + fields := []string{"name", "container_type"} + query := "api/storage/disks" + href := rest.BuildHref(query, strings.Join(fields, ","), []string{"container_type=broken|unassigned"}, "", "", "", "", query) + + if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil { + return nil, err + } + return result, nil +} + +func (h *Health) getShelves() ([]gjson.Result, error) { + var ( + result []gjson.Result + err error + ) + + fields := []string{"error_type", "error_severity", "error_text"} + query := "api/private/cli/storage/shelf" + href := rest.BuildHref(query, strings.Join(fields, ","), nil, "", "", "", "", query) + + if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil { + return nil, err + } + return result, nil +} + +func (h *Health) getNodes() ([]gjson.Result, error) { + var ( + result []gjson.Result + err error + ) + + fields := []string{"health"} + query := "api/private/cli/node" + href := rest.BuildHref(query, strings.Join(fields, ","), []string{"health=false"}, "", "", "", "", query) + + if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil { + return nil, err + } + return result, nil +} + +func (h *Health) getRansomwareVolumes() ([]gjson.Result, error) { + var ( + result []gjson.Result + err error + ) + + query := "api/storage/volumes" + href := rest.BuildHref(query, "", []string{"anti_ransomware.state=enabled", "anti_ransomware.attack_probability=low|moderate|high"}, "", "", "", "", query) + + if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil { + return nil, err + } + return result, nil +} + +func (h *Health) getNonCompliantLicense() ([]gjson.Result, error) { + var ( + result []gjson.Result + err error + ) + + query := "api/cluster/licensing/licenses" + fields := []string{"name,scope,state"} + href := rest.BuildHref(query, strings.Join(fields, ","), []string{"state=noncompliant"}, "", "", "", "", query) + + if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil { + return nil, err + } + return result, nil +} + +func (h *Health) getMoveFailedVolumes() ([]gjson.Result, error) { + var ( + result []gjson.Result + err error + ) + + query := "api/storage/volumes" + fields := []string{"uuid,name,movement.state,svm"} + href := rest.BuildHref(query, strings.Join(fields, ","), []string{"movement.state=cutover_wait|failed|cutover_pending"}, "", "", "", "", query) + + if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil { + return nil, err + } + return result, nil +} + +func (h *Health) getNonHomeLIFs() ([]gjson.Result, error) { + var ( + result []gjson.Result + err error + ) + + query := "api/network/ip/interfaces" + href := rest.BuildHref(query, "", []string{"location.is_home=false"}, "", "", "", "", query) + + if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil { + return nil, err + } + return result, nil +} + +func (h *Health) getFCPorts() ([]gjson.Result, error) { + var ( + result []gjson.Result + err error + ) + + fields := []string{"name,node"} + query := "api/network/fc/ports" + href := rest.BuildHref(query, strings.Join(fields, ","), []string{"enabled=true", "state=offlined_by_system"}, "", "", "", "", query) + + if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil { + return nil, err + } + return result, nil +} + +func (h *Health) getEthernetPorts() ([]gjson.Result, error) { + var ( + result []gjson.Result + err error + ) + + fields := []string{"name,node"} + query := "api/network/ethernet/ports" + href := rest.BuildHref(query, strings.Join(fields, ","), []string{"enabled=true", "state=down"}, "", "", "", "", query) + + if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil { + return nil, err + } + return result, nil +} + +func (h *Health) getSupportAlerts(filter []string) ([]gjson.Result, error) { + var ( + result []gjson.Result + err error + ) + query := "api/private/support/alerts" + href := rest.BuildHref(query, "", filter, "", "", "", "", query) + + if result, err = collectors.InvokeRestCall(h.client, href, h.Logger); err != nil { + return nil, err + } + + return result, nil +} + +// returns time filter (clustertime - polldata duration) +func (h *Health) getTimeStampFilter(clusterTime time.Time) string { + fromTime := h.lastFilterTime + // check if this is the first request + if h.lastFilterTime == 0 { + // if first request fetch cluster time + dataDuration, err := collectors.GetDataInterval(h.ParentParams, defaultDataPollDuration) + if err != nil { + h.Logger.Warn().Err(err). + Str("defaultDataPollDuration", defaultDataPollDuration.String()). + Msg("Failed to parse duration. using default") + } + fromTime = clusterTime.Add(-dataDuration).Unix() + } + return fmt.Sprintf("time=>=%d", fromTime) +} + +func (h *Health) setAlertMetric(mat *matrix.Matrix, instance *matrix.Instance) { + var err error + m := mat.GetMetric("alerts") + if m == nil { + if m, err = mat.NewMetricFloat64("alerts"); err != nil { + h.Logger.Warn().Err(err).Str("key", "alerts").Msg("error while creating metric") + return + } + } + if err = m.SetValueFloat64(instance, 1); err != nil { + h.Logger.Error().Err(err).Str("metric", "alerts").Msg("Unable to set value on metric") + } +} diff --git a/cmd/collectors/rest/rest.go b/cmd/collectors/rest/rest.go index 056784a3d..efed92b17 100644 --- a/cmd/collectors/rest/rest.go +++ b/cmd/collectors/rest/rest.go @@ -4,6 +4,7 @@ import ( "fmt" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/certificate" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/disk" + "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/health" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/netroute" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/qospolicyadaptive" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/qospolicyfixed" @@ -369,6 +370,8 @@ func (r *Rest) LoadPlugin(kind string, abc *plugin.AbstractPlugin) plugin.Plugin switch kind { case "Disk": return disk.New(abc) + case "Health": + return health.New(abc) case "NetRoute": return netroute.New(abc) case "Qtree": diff --git a/cmd/tools/grafana/dashboard_test.go b/cmd/tools/grafana/dashboard_test.go index 315afba12..cb258f8e0 100644 --- a/cmd/tools/grafana/dashboard_test.go +++ b/cmd/tools/grafana/dashboard_test.go @@ -311,7 +311,7 @@ func doPanel(t *testing.T, pathPrefix string, key gjson.Result, value gjson.Resu numExpressions := len(expressions) for _, e := range expressions { // Ignore labels and _status - if strings.HasSuffix(e.metric, "_labels") || strings.HasSuffix(e.metric, "_status") { + if strings.HasSuffix(e.metric, "_labels") || strings.HasSuffix(e.metric, "_status") || strings.HasSuffix(e.metric, "_events") || strings.HasSuffix(e.metric, "_alerts") { continue } unit := unitForExpr(e, overrides, defaultUnit, valueToName, numExpressions) @@ -615,6 +615,7 @@ func TestOnlyHighlightsExpanded(t *testing.T) { "cmode/fsa.json": 2, "cmode/workload.json": 2, "cmode/smb2.json": 2, + "cmode/health.json": 2, } // count number of expanded sections in dashboard and ensure num expanded = 1 visitDashboards( diff --git a/conf/rest/9.12.0/node.yaml b/conf/rest/9.12.0/node.yaml index d77e503ed..397a1434a 100644 --- a/conf/rest/9.12.0/node.yaml +++ b/conf/rest/9.12.0/node.yaml @@ -11,6 +11,7 @@ counters: - ^location - ^model - ^serial_number => serial + - ^state - ^version.full => version - controller.failed_fan.count => failed_fan - controller.failed_power_supply.count => failed_power @@ -47,5 +48,6 @@ export_options: - model - serial - uptime + - state - vendor - version diff --git a/conf/rest/9.6.0/health.yaml b/conf/rest/9.6.0/health.yaml new file mode 100644 index 000000000..41c0fe245 --- /dev/null +++ b/conf/rest/9.6.0/health.yaml @@ -0,0 +1,12 @@ +name: Health +query: api/cluster +object: health + +counters: + - ^^uuid + - ^name + +plugins: + - Health + +export_data: false diff --git a/conf/rest/default.yaml b/conf/rest/default.yaml index 95ef33a1b..bedb4e2fa 100644 --- a/conf/rest/default.yaml +++ b/conf/rest/default.yaml @@ -13,6 +13,7 @@ objects: ClusterPeer: clusterpeer.yaml Disk: disk.yaml # ExportRule: exports.yaml + Health: health.yaml Lun: lun.yaml Namespace: namespace.yaml NetConnections: netconnections.yaml diff --git a/grafana/dashboards/cmode/health.json b/grafana/dashboards/cmode/health.json new file mode 100644 index 000000000..6f9f3dbbd --- /dev/null +++ b/grafana/dashboards/cmode/health.json @@ -0,0 +1,3219 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "8.1.8" + }, + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "", + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "id": null, + "iteration": 1681133194625, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "cdot" + ], + "targetBlank": false, + "title": "Related Dashboards", + "tooltip": "", + "type": "dashboards", + "url": "" + } + ], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 239, + "panels": [], + "title": "Important Information about Health Dashboard", + "type": "row" + }, + { + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 241, + "options": { + "content": "This dashboard requires ONTAP 9.6+ and the REST collector. Two actions are required to use this dashboard:
1. Enable the REST collector in your harvest.yml config
2. Enable the EMS collector in your harvest.yml config for EMS events
", + "mode": "markdown" + }, + "pluginVersion": "8.1.8", + "type": "text" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 225, + "panels": [], + "title": "Highlights", + "type": "row" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Volumes protected" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Volumes not protected" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 5 + }, + "id": 277, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "(count(health_disk_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))\n+\n(count(health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))\n+\n(count(health_node_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))\n+\n(count(health_network_fc_port_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))\n+\n(count(health_network_ethernet_port_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))\n+\n(count(health_license_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Total Errors", + "transformations": [], + "type": "stat" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "orange", + "value": null + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Volumes protected" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Volumes not protected" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 5 + }, + "id": 278, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "(count(health_disk_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0))\n+\n(count(health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0))\n+\n(count(last_over_time(health_support_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}[24h]) == 1) or vector(0))\n+\n(count(health_network_interface_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0))\n+\n(count(health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0))\n+\n(count(health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Total Warnings", + "transformations": [], + "type": "stat" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "$EMSDescription", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Volumes not protected" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 5 + }, + "id": 270, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "(count(last_over_time(ems_events{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"emergency\"}[$__range]) == 1) or vector(0))", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Total Active Emergency EMS", + "transformations": [], + "type": "stat" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "locale" + }, + "overrides": [] + }, + "gridPos": { + "h": 16, + "w": 8, + "x": 0, + "y": 11 + }, + "id": 268, + "options": { + "displayLabels": [ + "value" + ], + "legend": { + "displayMode": "table", + "placement": "bottom", + "values": [ + "value" + ] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "exemplar": false, + "expr": "count(health_disk_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0)", + "instant": true, + "interval": "", + "legendFormat": "Broken Disk", + "refId": "A" + }, + { + "exemplar": false, + "expr": "count(health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Shelf Error", + "refId": "B" + }, + { + "exemplar": false, + "expr": "count(health_node_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Node Down", + "refId": "D" + }, + { + "exemplar": false, + "expr": "(count(health_network_fc_port_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))\n+\n(count(health_network_ethernet_port_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0))", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Network Port Down", + "refId": "E" + }, + { + "exemplar": false, + "expr": "count(health_license_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "License Non Compliant", + "refId": "C" + } + ], + "title": "Errors", + "transformations": [], + "type": "piechart" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Volumes protected" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Volumes not protected" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 16, + "w": 8, + "x": 8, + "y": 11 + }, + "id": 269, + "options": { + "displayLabels": [ + "value" + ], + "legend": { + "displayMode": "table", + "placement": "bottom", + "values": [ + "value" + ] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.2", + "targets": [ + { + "exemplar": false, + "expr": "count(health_disk_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0)", + "instant": true, + "interval": "", + "legendFormat": "Unassigned Disk", + "refId": "A" + }, + { + "exemplar": false, + "expr": "count(health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Shelf Warning", + "refId": "B" + }, + { + "exemplar": false, + "expr": "(count(last_over_time(health_support_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}[24h]) == 1) or vector(0))", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Health Monitor Alerts (last 24h)", + "refId": "C" + }, + { + "exemplar": false, + "expr": "count(health_network_interface_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Network Interface are not at home port", + "refId": "D" + }, + { + "exemplar": false, + "expr": "count(health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"warning\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Volume Move Alerts", + "refId": "E" + }, + { + "exemplar": false, + "expr": "count(health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"error\"}) or vector(0)", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Volume Ransomware (9.10+)", + "refId": "F" + } + ], + "title": "Warnings", + "transformations": [], + "type": "piechart" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "$EMSDescription", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "locale" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Volumes protected" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Volumes not protected" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 16, + "w": 8, + "x": 16, + "y": 11 + }, + "id": 272, + "options": { + "showHeader": true + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "count(last_over_time(ems_events{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"emergency\"}[$__range]) == 1) by (message)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Active Emergency EMS", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": { + "Value": "Count", + "message": "EMS" + } + } + } + ], + "type": "table" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 251, + "panels": [ + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Healthy" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "false": { + "index": 0, + "text": "No" + } + }, + "type": "value" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 253, + "options": { + "showHeader": true + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "node_labels{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(cluster,node,datacenter) group_left(severity) health_node_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Node Issues", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "instance": true, + "job": true + }, + "indexByName": { + "Time": 0, + "Value": 10, + "cluster": 1, + "datacenter": 2, + "healthy": 4, + "instance": 8, + "job": 9, + "node": 3, + "severity": 6, + "state": 5, + "version": 7 + }, + "renameByName": { + "cluster": "Cluster", + "datacenter": "Datacenter", + "healthy": "Healthy", + "node": "Node", + "severity": "Severity", + "state": "State", + "version": "Version" + } + } + } + ], + "type": "table" + } + ], + "title": "Node", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 230, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": true + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Datacenter" + }, + "properties": [ + { + "id": "unit", + "value": "string" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Shelf" + }, + "properties": [ + { + "id": "custom.width", + "value": null + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 248, + "options": { + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "disk_labels{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(disk,cluster,datacenter) group_left(severity) health_disk_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Disks Issues", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "failed": true, + "index": true, + "instance": true, + "job": true, + "node": true, + "owner_node": true + }, + "indexByName": { + "Time": 0, + "Value": 11, + "cluster": 2, + "container_type": 5, + "datacenter": 1, + "disk": 3, + "instance": 9, + "job": 10, + "model": 8, + "serial_number": 7, + "severity": 4, + "shelf": 6 + }, + "renameByName": { + "cluster": "Cluster", + "container_type": "Container Type", + "datacenter": "Datacenter", + "disk": "Disk", + "model": "Model", + "serial_number": "Serial Number", + "severity": "Severity", + "shelf": "Shelf" + } + } + } + ], + "type": "table" + } + ], + "title": "Disks", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 245, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "displayMode": "auto", + "filterable": true + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "semi-dark-yellow", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "disk_count" + }, + "properties": [ + { + "id": "unit", + "value": "locale" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "shelf" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "json-view" + }, + { + "id": "custom.width" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "state" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(224, 47, 47)", + "value": null + }, + { + "color": "rgb(118, 204, 49)", + "value": 1 + } + ] + } + }, + { + "id": "mappings", + "value": [ + { + "options": { + "1": { + "text": "ONLINE" + } + }, + "type": "value" + }, + { + "options": { + "from": 0, + "result": { + "text": "OFFLINE" + }, + "to": 0.999 + }, + "type": "range" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Ambient Temp (C)" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "green", + "value": 5 + }, + { + "color": "#EAB839", + "value": 45 + }, + { + "color": "orange", + "value": 65 + }, + { + "color": "red", + "value": 75 + } + ] + } + }, + { + "id": "custom.align", + "value": "right" + }, + { + "id": "unit", + "value": "celsius" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max Temp (C)" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "green", + "value": 5 + }, + { + "color": "yellow", + "value": 45 + }, + { + "color": "orange", + "value": 65 + }, + { + "color": "red", + "value": 75 + } + ] + } + }, + { + "id": "custom.align", + "value": "right" + }, + { + "id": "unit", + "value": "celsius" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Temp (C)" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "green", + "value": 5 + }, + { + "color": "yellow", + "value": 45 + }, + { + "color": "orange", + "value": 65 + }, + { + "color": "red", + "value": 75 + } + ] + } + }, + { + "id": "custom.align", + "value": "right" + }, + { + "id": "unit", + "value": "celsius" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Max Fan Speed (rpm)" + }, + "properties": [ + { + "id": "custom.align", + "value": "right" + }, + { + "id": "unit", + "value": "rotrpm" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Avg Fan Speed (rpm)" + }, + "properties": [ + { + "id": "custom.align", + "value": "right" + }, + { + "id": "unit", + "value": "rotrpm" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min Fan Speed (rpm)" + }, + "properties": [ + { + "id": "custom.align", + "value": "right" + }, + { + "id": "unit", + "value": "rotrpm" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Power" + }, + "properties": [ + { + "id": "custom.align", + "value": "right" + }, + { + "id": "unit", + "value": "watt" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min Ambient Temp (C)" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "green", + "value": 5 + }, + { + "color": "#eab839", + "value": 45 + }, + { + "color": "orange", + "value": 65 + }, + { + "color": "red", + "value": 75 + } + ] + } + }, + { + "id": "custom.align", + "value": "right" + }, + { + "id": "unit", + "value": "celsius" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Min Temp (C)" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "green", + "value": 5 + }, + { + "color": "yellow", + "value": 45 + }, + { + "color": "orange", + "value": 65 + }, + { + "color": "red", + "value": 75 + } + ] + } + }, + { + "id": "custom.align", + "value": "right" + }, + { + "id": "unit", + "value": "celsius" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 243, + "interval": "1m", + "maxDataPoints": 2, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "shelf_labels{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity,error_type,error_text) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "exemplar": false, + "expr": "shelf_disk_count{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "exemplar": false, + "expr": "shelf_new_status{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" + }, + { + "exemplar": false, + "expr": "shelf_power{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "D" + }, + { + "exemplar": false, + "expr": "shelf_average_ambient_temperature{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "E" + }, + { + "exemplar": false, + "expr": "shelf_min_ambient_temperature{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "K" + }, + { + "exemplar": false, + "expr": "shelf_max_temperature{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "F" + }, + { + "exemplar": false, + "expr": "shelf_average_temperature{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "G" + }, + { + "exemplar": false, + "expr": "shelf_min_temperature{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "L" + }, + { + "exemplar": false, + "expr": "shelf_max_fan_speed{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "H" + }, + { + "exemplar": false, + "expr": "shelf_average_fan_speed{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "I" + }, + { + "exemplar": false, + "expr": "shelf_min_fan_speed{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(shelf,cluster,datacenter) group_left(severity) health_shelf_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "J" + } + ], + "title": "Storage Shelf Issues", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "cluster", + "datacenter", + "model", + "module_type", + "op_status", + "serial_number", + "shelf", + "state", + "vendor_name", + "Value #A", + "Value #B", + "Value #C", + "error_text", + "error_type", + "severity" + ] + } + } + }, + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Value #A": true, + "shelf_id": true, + "state": true + }, + "indexByName": { + "Value #A": 13, + "Value #B": 10, + "Value #C": 14, + "cluster": 1, + "datacenter": 0, + "error_text": 3, + "error_type": 4, + "model": 6, + "module_type": 9, + "op_status": 11, + "serial_number": 7, + "severity": 5, + "shelf": 2, + "state": 12, + "vendor_name": 8 + }, + "renameByName": { + "Value #A": "", + "Value #B": "disk_count", + "Value #C": "state", + "Value #D": "Power", + "Value #E": "Avg Ambient Temp (C)", + "Value #F": "Max Temp (C)", + "Value #G": "Avg Temp (C)", + "Value #H": "Max Fan Speed (rpm)", + "Value #I": "Avg Fan Speed (rpm)", + "Value #J": "Min Fan Speed (rpm)", + "Value #K": "Min Ambient Temp (C)", + "Value #L": "Min Temp (C)", + "error_text": "Error", + "error_type": "Error Type", + "module_type": "", + "op_status": "", + "severity": "Severity", + "shelf_id": "" + } + } + } + ], + "type": "table" + } + ], + "title": "Shelves", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 266, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "description": "Volumes with abnormal activity", + "fieldConfig": { + "defaults": { + "custom": { + "align": "left", + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(31, 176, 196)", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "1": { + "text": "online" + } + }, + "type": "value" + }, + { + "options": { + "from": 0, + "result": { + "text": "offline" + }, + "to": 0.99 + }, + "type": "range" + } + ] + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(83, 179, 59)", + "value": null + }, + { + "color": "semi-dark-red", + "value": 5 + } + ] + } + }, + { + "id": "custom.displayMode", + "value": "color-background-solid" + }, + { + "id": "custom.width" + }, + { + "id": "displayName", + "value": "status" + }, + { + "id": "color", + "value": { + "mode": "thresholds" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "space used" + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.displayMode", + "value": "gradient-gauge" + }, + { + "id": "max", + "value": 100 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dedupe Space Saved" + }, + "properties": [ + { + "id": "custom.width" + }, + { + "id": "displayName", + "value": "Dedupe Space Saved" + }, + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Compression Space Saved" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Space Saved" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Logical Space Used" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Physical Space Used" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 264, + "interval": "1m", + "maxDataPoints": 2, + "options": { + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "space used" + } + ] + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "volume_labels{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity,anti_ransomware_attack_probability) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "B" + }, + { + "exemplar": false, + "expr": "volume_new_status{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + }, + { + "exemplar": false, + "expr": "volume_size_total{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "C" + }, + { + "exemplar": false, + "expr": "volume_size_used_percent{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "D" + }, + { + "exemplar": false, + "expr": "volume_sis_dedup_saved{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "E" + }, + { + "exemplar": false, + "expr": "volume_sis_compress_saved{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "F" + }, + { + "exemplar": false, + "expr": "volume_sis_dedup_saved+volume_sis_compress_saved{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "G" + }, + { + "exemplar": false, + "expr": "volume_space_logical_used{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "H" + }, + { + "exemplar": false, + "expr": "volume_space_physical_used{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter) group_left(severity) health_volume_ransomware_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "I" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Volumes with Ransomware Issues (9.10+ Only)", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "aggr", + "node", + "svm", + "volume", + "Value #A", + "Value #C", + "Value #D", + "Value #E", + "Value #F", + "Value #H", + "Value #I", + "antiRansomwareState", + "anti_ransomware_attack_probability" + ] + } + } + }, + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "Value #C": false, + "__name__": true, + "cluster": true, + "datacenter": true, + "instance": true, + "job": true, + "state": true + }, + "indexByName": { + "Value #A": 6, + "Value #C": 7, + "Value #D": 8, + "Value #E": 11, + "Value #F": 12, + "Value #H": 9, + "Value #I": 10, + "aggr": 3, + "antiRansomwareState": 4, + "anti_ransomware_attack_probability": 5, + "node": 2, + "svm": 0, + "volume": 1 + }, + "renameByName": { + "Value #C": "Size", + "Value #D": "", + "Value #E": "Dedupe Space Saved", + "Value #F": "Compression Space Saved", + "Value #G": "Total Space Saved", + "Value #H": "Logical Space Used", + "Value #I": "Physical Space Used", + "antiRansomwareState": "Ransomware State", + "anti_ransomware_attack_probability": "Ransomware Attack Probability" + } + } + } + ], + "type": "table" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": "left", + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(31, 176, 196)", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "1": { + "text": "online" + } + }, + "type": "value" + }, + { + "options": { + "from": 0, + "result": { + "text": "offline" + }, + "to": 0.99 + }, + "type": "range" + } + ] + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(83, 179, 59)", + "value": null + }, + { + "color": "semi-dark-red", + "value": 5 + } + ] + } + }, + { + "id": "custom.displayMode", + "value": "color-background-solid" + }, + { + "id": "custom.width" + }, + { + "id": "displayName", + "value": "status" + }, + { + "id": "color", + "value": { + "mode": "thresholds" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "space used" + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.displayMode", + "value": "gradient-gauge" + }, + { + "id": "max", + "value": 100 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Dedupe Space Saved" + }, + "properties": [ + { + "id": "custom.width" + }, + { + "id": "displayName", + "value": "Dedupe Space Saved" + }, + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Compression Space Saved" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Space Saved" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Logical Space Used" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Physical Space Used" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 271, + "interval": "1m", + "maxDataPoints": 2, + "options": { + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "space used" + } + ] + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "volume_labels{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity,movement_state) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "B" + }, + { + "exemplar": false, + "expr": "volume_new_status{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + }, + { + "exemplar": false, + "expr": "volume_size_total{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "C" + }, + { + "exemplar": false, + "expr": "volume_size_used_percent{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "D" + }, + { + "exemplar": false, + "expr": "volume_sis_dedup_saved{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "E" + }, + { + "exemplar": false, + "expr": "volume_sis_compress_saved{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "F" + }, + { + "exemplar": false, + "expr": "volume_sis_dedup_saved+volume_sis_compress_saved{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "G" + }, + { + "exemplar": false, + "expr": "volume_space_logical_used{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "H" + }, + { + "exemplar": false, + "expr": "volume_space_physical_used{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"} * on(volume,cluster,datacenter,svm) group_left(severity) health_volume_move_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "I" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Volumes Move Issues", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "aggr", + "cluster", + "datacenter", + "movement_state", + "node", + "severity", + "svm", + "volume", + "Value #A", + "Value #C", + "Value #D", + "Value #E", + "Value #F", + "Value #H", + "Value #I" + ] + } + } + }, + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "Value #A": false, + "Value #C": false, + "__name__": true, + "cluster": false, + "datacenter": false, + "instance": true, + "job": true, + "state": true + }, + "indexByName": { + "Value #A": 8, + "Value #C": 9, + "Value #D": 10, + "Value #E": 13, + "Value #F": 14, + "Value #H": 11, + "Value #I": 12, + "aggr": 7, + "cluster": 1, + "datacenter": 0, + "movement_state": 5, + "node": 6, + "severity": 4, + "svm": 2, + "volume": 3 + }, + "renameByName": { + "Value #C": "Size", + "Value #D": "", + "Value #E": "Dedupe Space Saved", + "Value #F": "Compression Space Saved", + "Value #G": "Total Space Saved", + "Value #H": "Logical Space Used", + "Value #I": "Physical Space Used", + "aggr": "Aggregate", + "antiRansomwareState": "Ransomware State", + "anti_ransomware_attack_probability": "Ransomware Attack Probability", + "cluster": "Cluster", + "datacenter": "Datacenter", + "movement_state": "Movement State", + "node": "Node", + "severity": "Severity", + "svm": "SVM", + "volume": "Volume" + } + } + } + ], + "type": "table" + } + ], + "title": "Volume", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 274, + "panels": [ + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 276, + "options": { + "showHeader": true + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "health_license_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Non Compliant License", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "cluster", + "datacenter", + "name", + "scope", + "severity", + "state" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "cluster": "Cluster", + "datacenter": "Datacenter", + "name": "Name", + "scope": "Scope", + "severity": "Severity", + "state": "State" + } + } + } + ], + "type": "table" + } + ], + "title": "License", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 255, + "panels": [ + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Home?" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "false": { + "index": 0, + "text": "No" + } + }, + "type": "value" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 257, + "options": { + "showHeader": true + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "health_network_interface_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Network interfaces not at home port", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "instance": true, + "job": true + }, + "indexByName": { + "Time": 0, + "Value": 9, + "__name__": 1, + "cluster": 3, + "datacenter": 2, + "instance": 7, + "isHome": 5, + "job": 8, + "lif": 4, + "severity": 6 + }, + "renameByName": { + "cluster": "Cluster", + "datacenter": "Datacenter", + "instance": "", + "isHome": "Home?", + "lif": "Network Interface", + "severity": "Severity" + } + } + } + ], + "type": "table" + } + ], + "title": "Network Interface", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 259, + "panels": [ + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 261, + "options": { + "showHeader": true + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "health_network_ethernet_port_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Ethernet ports are down", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "instance": true, + "job": true + }, + "indexByName": { + "Time": 0, + "Value": 10, + "__name__": 1, + "cluster": 3, + "datacenter": 2, + "instance": 4, + "job": 5, + "node": 6, + "port": 7, + "severity": 9, + "state": 8 + }, + "renameByName": { + "cluster": "Cluster", + "datacenter": "Datacenter", + "node": "Node", + "port": "Port", + "severity": "Severity", + "state": "State" + } + } + } + ], + "type": "table" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 262, + "options": { + "showHeader": true + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "health_network_fc_port_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "FC ports are down", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "instance": true, + "job": true + }, + "indexByName": { + "Time": 0, + "Value": 10, + "__name__": 1, + "cluster": 2, + "datacenter": 3, + "instance": 4, + "job": 5, + "node": 6, + "port": 7, + "severity": 9, + "state": 8 + }, + "renameByName": { + "cluster": "Cluster", + "datacenter": "Datacenter", + "node": "Node", + "port": "Port", + "severity": "Severity", + "state": "State" + } + } + } + ], + "type": "table" + } + ], + "title": "Network Port", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 235, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "description": "$EMSDescription", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 237, + "options": { + "showHeader": true + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "last_over_time(ems_events{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",severity=\"emergency\"}[$__range]) == 1", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Emergency EMS", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "pattern": "/^(datacenter|cluster|message|node|severity)$/" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "cluster": 1, + "datacenter": 0, + "message": 2, + "node": 3, + "severity": 4 + }, + "renameByName": { + "cluster": "Cluster", + "datacenter": "Datacenter", + "message": "Message", + "node": "Node", + "severity": "Severity" + } + } + } + ], + "type": "table" + } + ], + "title": "Emergency EMS", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 247, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "description": "These are the health monitor events that have occurred within the selected time range", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": true + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Datacenter" + }, + "properties": [ + { + "id": "unit", + "value": "string" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 249, + "options": { + "showHeader": true + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "last_over_time(health_support_alerts{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}[$__range]) == 1", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "System Alerts", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "index": true, + "instance": true, + "job": true, + "node": true, + "owner_node": true + }, + "indexByName": { + "Time": 0, + "Value": 6, + "__name__": 7, + "cluster": 2, + "correctiveAction": 12, + "datacenter": 1, + "instance": 4, + "job": 5, + "monitor": 9, + "name": 8, + "node": 13, + "reason": 11, + "resource": 10, + "severity": 3 + }, + "renameByName": { + "cluster": "Cluster", + "container_type": "Container Type", + "correctiveAction": "Corrective Action", + "datacenter": "Datacenter", + "disk": "Disk", + "model": "Model", + "monitor": "Monitor", + "name": "Name", + "reason": "Reason", + "resource": "Resource", + "serial_number": "Serial Number", + "severity": "Severity", + "shelf": "Shelf" + } + } + } + ], + "type": "table" + } + ], + "title": "System Health Alerts", + "type": "row" + } + ], + "refresh": "", + "schemaVersion": 30, + "style": "dark", + "tags": [ + "harvest", + "ontap", + "cdot" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "description": null, + "error": null, + "hide": 2, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(node_labels{system_type!=\"7mode\"},datacenter)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "", + "multi": true, + "name": "Datacenter", + "options": [], + "query": { + "query": "label_values(node_labels{system_type!=\"7mode\"},datacenter)", + "refId": "Prometheus-Datacenter-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(node_labels{system_type!=\"7mode\",datacenter=~\"$Datacenter\"},cluster)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "", + "multi": true, + "name": "Cluster", + "options": [], + "query": { + "query": "label_values(node_labels{system_type!=\"7mode\",datacenter=~\"$Datacenter\"},cluster)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "The EMS collector gathers EMS events as defined in your ems.yml file. This panel displays events with emergency severity that occurred within the selected time range.", + "value": "The EMS collector gathers EMS events as defined in your ems.yml file. This panel displays events with emergency severity that occurred within the selected time range." + }, + "description": null, + "error": null, + "hide": 2, + "label": null, + "name": "EMSDescription", + "options": [ + { + "selected": true, + "text": "The EMS collector gathers EMS events as defined in your ems.yml file. This panel displays events with emergency severity that occurred within the selected time range.", + "value": "The EMS collector gathers EMS events as defined in your ems.yml file. This panel displays events with emergency severity that occurred within the selected time range." + } + ], + "query": "The EMS collector gathers EMS events as defined in your ems.yml file. This panel displays events with emergency severity that occurred within the selected time range.", + "skipUrlSync": false, + "type": "textbox" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "ONTAP: Health", + "uid": "", + "version": 1 +} \ No newline at end of file diff --git a/grafana/dashboards/cmode/nfs_clients.json b/grafana/dashboards/cmode/nfs_clients.json index 8ee7c7e36..744ab53aa 100644 --- a/grafana/dashboards/cmode/nfs_clients.json +++ b/grafana/dashboards/cmode/nfs_clients.json @@ -71,7 +71,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1665135798398, + "iteration": 1680864207753, "links": [ { "asDropdown": true, @@ -227,8 +227,8 @@ "id": 27, "options": { "legend": { - "displayMode": "hidden", - "placement": "right", + "displayMode": "table", + "placement": "bottom", "values": [ "value" ] @@ -577,5 +577,5 @@ "timezone": "", "title": "ONTAP: NFS Clients", "uid": "", - "version": 2 -} + "version": 3 +} \ No newline at end of file diff --git a/integration/test/data/counter_data.go b/integration/test/data/counter_data.go index f85b42733..d3764d391 100644 --- a/integration/test/data/counter_data.go +++ b/integration/test/data/counter_data.go @@ -42,6 +42,8 @@ func GetCounterMap() map[string][]string { "svm_nfs_read_latency_hist_bucket", "svm_nfs_write_latency_hist_bucket", "smb2_", + "health_", + "ems_events", } //if docker.IsDockerBasedPoller() || setup.IsMac { counterMap["NO_DATA_CONTAINS"] = append(counterMap["NO_DATA_CONTAINS"], "poller", "metadata_exporter_count")