Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 2060726: Use namespace when creating ServiceMonitor #70

Merged
merged 2 commits into from Jul 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
60 changes: 37 additions & 23 deletions cmd/manager/operator.go
Expand Up @@ -95,7 +95,6 @@ var (
}
serviceMonitorBearerTokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token"
serviceMonitorTLSCAFile = "/etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt"
serviceMonitorTLSServerName = "metrics.openshift-compliance.svc"
alertName = "compliance"
)

Expand Down Expand Up @@ -321,7 +320,7 @@ func addMetrics(ctx context.Context, cfg *rest.Config, kClient *kubernetes.Clien
os.Exit(1)
}

if err := createServiceMonitor(ctx, cfg, mClient, operatorNs, metricsService); err != nil {
if err := handleServiceMonitor(ctx, cfg, mClient, operatorNs, metricsService); err != nil {
log.Error(err, "Error creating ServiceMonitor")
os.Exit(1)
}
Expand All @@ -332,10 +331,8 @@ func addMetrics(ctx context.Context, cfg *rest.Config, kClient *kubernetes.Clien
}
}

func ensureMetricsServiceAndSecret(ctx context.Context, kClient *kubernetes.Clientset, ns string) (*v1.Service, error) {
var mService *v1.Service
var err error
mService, err = kClient.CoreV1().Services(ns).Create(ctx, &v1.Service{
func operatorMetricService(ns string) *v1.Service {
return &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"name": "compliance-operator",
Expand Down Expand Up @@ -372,7 +369,13 @@ func ensureMetricsServiceAndSecret(ctx context.Context, kClient *kubernetes.Clie
},
Type: v1.ServiceTypeClusterIP,
},
}, metav1.CreateOptions{})
}
}

func ensureMetricsServiceAndSecret(ctx context.Context, kClient *kubernetes.Clientset, ns string) (*v1.Service, error) {
var mService *v1.Service
var err error
mService, err = kClient.CoreV1().Services(ns).Create(ctx, operatorMetricService(ns), metav1.CreateOptions{})
if err != nil && !kerr.IsAlreadyExists(err) {
return nil, err
}
Expand Down Expand Up @@ -540,20 +543,7 @@ func getDefaultRoles(platform PlatformType) []string {
return defaultRolesPerPlatform[PlatformGeneric]
}

// createServiceMonitor attempts to create a ServiceMonitor out of service, and updates it to include the controller
// metrics paths.
func createServiceMonitor(ctx context.Context, cfg *rest.Config, mClient *monclientv1.MonitoringV1Client,
namespace string, service *v1.Service) error {
ok, err := k8sutil.ResourceExists(discovery.NewDiscoveryClientForConfigOrDie(cfg),
"monitoring.coreos.com/v1", "ServiceMonitor")
if err != nil {
return err
}
if !ok {
log.Info("Install prometheus-operator in your cluster to create ServiceMonitor objects")
return nil
}

func generateOperatorServiceMonitor(service *v1.Service, namespace string) *monitoring.ServiceMonitor {
serviceMonitor := metrics.GenerateServiceMonitor(service)
for i := range serviceMonitor.Spec.Endpoints {
if serviceMonitor.Spec.Endpoints[i].Port == ctrlMetrics.ControllerMetricsServiceName {
Expand All @@ -562,12 +552,17 @@ func createServiceMonitor(ctx context.Context, cfg *rest.Config, mClient *moncli
serviceMonitor.Spec.Endpoints[i].BearerTokenFile = serviceMonitorBearerTokenFile
serviceMonitor.Spec.Endpoints[i].TLSConfig = &monitoring.TLSConfig{
CAFile: serviceMonitorTLSCAFile,
ServerName: serviceMonitorTLSServerName,
ServerName: "metrics." + namespace + ".svc",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This the crux of the fix, right? The testing changes below aren't directly related to this change?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, the testing changes are unrelated. (Our CI testing is a direct read from the metrics service, while the ServiceMonitor is so cluster monitoring can read the metrics)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a unit testing layer available to ensure this format doesn't regress?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not atm. I can add it though

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rhmdnd I refactored some to fit a simple unit test..

}
}
}
return serviceMonitor
}

_, err = mClient.ServiceMonitors(namespace).Create(ctx, serviceMonitor, metav1.CreateOptions{})
// createOrUpdateServiceMonitor creates or updates the ServiceMonitor if it already exists.
func createOrUpdateServiceMonitor(ctx context.Context, mClient *monclientv1.MonitoringV1Client,
namespace string, serviceMonitor *monitoring.ServiceMonitor) error {
_, err := mClient.ServiceMonitors(namespace).Create(ctx, serviceMonitor, metav1.CreateOptions{})
if err != nil && !kerr.IsAlreadyExists(err) {
return err
}
Expand All @@ -587,6 +582,25 @@ func createServiceMonitor(ctx context.Context, cfg *rest.Config, mClient *moncli
return nil
}

// handleServiceMonitor attempts to create a ServiceMonitor out of service, and updates it to include the controller
// metrics paths.
func handleServiceMonitor(ctx context.Context, cfg *rest.Config, mClient *monclientv1.MonitoringV1Client,
namespace string, service *v1.Service) error {
ok, err := k8sutil.ResourceExists(discovery.NewDiscoveryClientForConfigOrDie(cfg),
"monitoring.coreos.com/v1", "ServiceMonitor")
if err != nil {
return err
}
if !ok {
log.Info("Install prometheus-operator in your cluster to create ServiceMonitor objects")
return nil
}

serviceMonitor := generateOperatorServiceMonitor(service, namespace)

return createOrUpdateServiceMonitor(ctx, mClient, namespace, serviceMonitor)
}

// serveCRMetrics gets the Operator/CustomResource GVKs and generates metrics based on those types.
// It serves those metrics on "http://metricsHost:operatorMetricsPort".
func serveCRMetrics(cfg *rest.Config, operatorNs string) error {
Expand Down
26 changes: 26 additions & 0 deletions cmd/manager/operator_test.go
@@ -0,0 +1,26 @@
package main

import (
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/openshift/compliance-operator/pkg/controller/metrics"
)

var _ = Describe("Operator Startup Function tests", func() {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding this

Context("Service Monitor Creation", func() {
When("Installing to non-controlled namespace", func() {
It("ServiceMonitor is generated with the proper TLSConfig ServerName", func() {
metricService := operatorMetricService("foobar")
sm := generateOperatorServiceMonitor(metricService, "foobar")
controllerMetricServiceFound := false
for _, ep := range sm.Spec.Endpoints {
if ep.Port == metrics.ControllerMetricsServiceName && ep.TLSConfig != nil {
Expect(ep.TLSConfig.ServerName).To(BeEquivalentTo("metrics.foobar.svc"))
controllerMetricServiceFound = true
}
}
Expect(controllerMetricServiceFound).To(BeTrue())
})
})
})
})
23 changes: 15 additions & 8 deletions tests/e2e/e2e_test.go
Expand Up @@ -586,16 +586,23 @@ func TestE2E(t *testing.T) {
fmt.Sprintf("compliance_operator_compliance_scan_status_total{name=\"%s\",phase=\"PENDING\",result=\"\"}", scanName): 1,
fmt.Sprintf("compliance_operator_compliance_scan_status_total{name=\"%s\",phase=\"RUNNING\",result=\"NOT-AVAILABLE\"}", scanName): 1,
}
metricsSet[aggrString] = 1
err = assertEachMetric(t, namespace, metricsSet)
if err != nil {
// Aggregating may be 1 or 2... try again
metricsSet[aggrString] = 2
secondTryErr := assertEachMetric(t, namespace, metricsSet)
if secondTryErr != nil {
return secondTryErr

var metErr error
// Aggregating may be variable, could be registered 1 to 3 times.
for i := 1; i < 4; i++ {
metricsSet[aggrString] = i
err = assertEachMetric(t, namespace, metricsSet)
if err == nil {
metErr = nil
break
}
metErr = err
}

if metErr != nil {
return metErr
}

return scanHasValidPVCReference(f, namespace, scanName)
},
},
Expand Down