Skip to content

Commit

Permalink
OCPBUGS-1803: Remove compliance_operator_compliance_scan_error_total …
Browse files Browse the repository at this point in the history
…metric

This metric contained the scan error, which can exceed lenghts of 2k
(sometimes 11k), and causes resource issues with Prometheus and
integrating metrics into different storage backends.

This commit removes the metric since it goes against Prometheus best
practices:

  https://prometheus.io/docs/practices/naming/#labels
  • Loading branch information
rhmdnd committed Feb 17, 2023
1 parent 919a8a5 commit 069a68e
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 23 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ Versioning](https://semver.org/spec/v2.0.0.html).

### Removals

-
- The `compliance_scan_error_total` metric was designed to count individual
scan errors. As a result, one of the metric keys contained the scan error,
which is large. The length and uniqueness of the metric itself can cause
issues in Prometheus, as noted in [Metric and Label Naming best
practices](https://prometheus.io/docs/practices/naming/#labels). This metris
has been removed as a result. Please see the [bug
report](https://issues.redhat.com/browse/OCPBUGS-1803) for more details.

### Security

Expand Down
5 changes: 0 additions & 5 deletions doc/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -424,11 +424,6 @@ The compliance-operator exposes the following metrics to Prometheus when cluster
# TYPE compliance_operator_compliance_scan_status_total counter
compliance_operator_compliance_scan_status_total{name="scan-name",phase="AGGREGATING",result="NOT-AVAILABLE"} 1

# HELP compliance_operator_compliance_scan_error_total A counter for the
# total number of encounters of error
# TYPE compliance_operator_compliance_scan_error_total counter
compliance_operator_compliance_scan_error_total{name="scan-name",error="some_error"} 1

# HELP compliance_operator_compliance_state A gauge for the compliance
# state of a ComplianceSuite. Set to 0 when COMPLIANT, 1 when NON-COMPLIANT,
# 2 when INCONSISTENT, and 3 when ERROR
Expand Down
17 changes: 0 additions & 17 deletions pkg/controller/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ const (
metricNamespace = "compliance_operator"

metricNameComplianceScanStatus = "compliance_scan_status_total"
metricNameComplianceScanError = "compliance_scan_error_total"
metricNameComplianceRemediationStatus = "compliance_remediation_status_total"
metricNameComplianceStateGauge = "compliance_state"

Expand Down Expand Up @@ -53,22 +52,13 @@ type Metrics struct {
}

type ControllerMetrics struct {
metricComplianceScanError *prometheus.CounterVec
metricComplianceScanStatus *prometheus.CounterVec
metricComplianceRemediationStatus *prometheus.CounterVec
metricComplianceStateGauge *prometheus.GaugeVec
}

func DefaultControllerMetrics() *ControllerMetrics {
return &ControllerMetrics{
metricComplianceScanError: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: metricNameComplianceScanError,
Namespace: metricNamespace,
Help: "A counter for the total number of encounters of error",
},
[]string{metricLabelScanName, metricLabelScanError},
),
metricComplianceScanStatus: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: metricNameComplianceScanStatus,
Expand Down Expand Up @@ -121,7 +111,6 @@ func New() *Metrics {
// Register iterates over all available metrics and registers them.
func (m *Metrics) Register() error {
for name, collector := range map[string]prometheus.Collector{
metricNameComplianceScanError: m.metrics.metricComplianceScanError,
metricNameComplianceScanStatus: m.metrics.metricComplianceScanStatus,
metricNameComplianceRemediationStatus: m.metrics.metricComplianceRemediationStatus,
metricNameComplianceStateGauge: m.metrics.metricComplianceStateGauge,
Expand Down Expand Up @@ -162,12 +151,6 @@ func (m *Metrics) IncComplianceScanStatus(name string, status v1alpha1.Complianc
metricLabelScanPhase: string(status.Phase),
metricLabelScanResult: string(status.Result),
}).Inc()
if len(status.ErrorMessage) > 0 {
m.metrics.metricComplianceScanError.With(prometheus.Labels{
metricLabelScanName: name,
metricLabelScanError: status.ErrorMessage,
}).Inc()
}
}

// IncComplianceRemediationStatus increments the ComplianceRemediation status counter
Expand Down

0 comments on commit 069a68e

Please sign in to comment.