From c52b7ddb9a1f0090e9eb6a76c4081a56ceff7940 Mon Sep 17 00:00:00 2001 From: Anish Asthana Date: Wed, 24 Mar 2021 20:55:23 -0400 Subject: [PATCH] Update Metrics endpoints for ODH operator (#349) * Fix ODH and Argo monitoring Signed-off-by: Anish Asthana * Increase replica count to 2 for HA Signed-off-by: Anish Asthana * Update Prometheus name and corresponding test Signed-off-by: Anish Asthana * Restructure Service Monitors This separate the ODH operator and ODH application monitoring into two seperate Service Monitors. Signed-off-by: Anish Asthana --- prometheus/operator/base/kustomization.yaml | 17 +++++++++++++- prometheus/operator/base/params.yaml | 4 ++++ .../prometheus-monitoring-role-binding.yaml | 13 +++++++++++ .../base/prometheus-monitoring-role.yaml | 23 +++++++++++++++++++ prometheus/operator/base/prometheus.yaml | 6 ++--- .../application-service-monitor.yaml} | 4 ++-- .../base/service-monitors/kustomization.yaml | 5 ++++ .../operator-service-monitor.yaml | 16 +++++++++++++ tests/basictests/prometheus.sh | 6 ++--- 9 files changed, 85 insertions(+), 9 deletions(-) create mode 100644 prometheus/operator/base/params.yaml create mode 100644 prometheus/operator/base/prometheus-monitoring-role-binding.yaml create mode 100644 prometheus/operator/base/prometheus-monitoring-role.yaml rename prometheus/operator/base/{servicemonitor.yaml => service-monitors/application-service-monitor.yaml} (83%) create mode 100644 prometheus/operator/base/service-monitors/kustomization.yaml create mode 100644 prometheus/operator/base/service-monitors/operator-service-monitor.yaml diff --git a/prometheus/operator/base/kustomization.yaml b/prometheus/operator/base/kustomization.yaml index 919f9d131..388a9590e 100644 --- a/prometheus/operator/base/kustomization.yaml +++ b/prometheus/operator/base/kustomization.yaml @@ -4,10 +4,25 @@ resources: - kafka-podmonitors.yaml - prometheus.yaml - route.yaml -- servicemonitor.yaml +- service-monitors +- prometheus-monitoring-role.yaml +- prometheus-monitoring-role-binding.yaml + namespace: opendatahub commonLabels: opendatahub.io/component: "true" component.opendatahub.io/name: prometheus generatorOptions: disableNameSuffixHash: true + +vars: + - name: namespace + objref: + kind: Prometheus + name: odh-monitoring + apiVersion: monitoring.coreos.com/v1 + fieldref: + fieldpath: metadata.namespace + +configurations: + - params.yaml diff --git a/prometheus/operator/base/params.yaml b/prometheus/operator/base/params.yaml new file mode 100644 index 000000000..b36da215c --- /dev/null +++ b/prometheus/operator/base/params.yaml @@ -0,0 +1,4 @@ +varReference: + - path: subjects/namespace + kind: ClusterRoleBinding + apiVersion: rbac.authorization.k8s.io/v1 diff --git a/prometheus/operator/base/prometheus-monitoring-role-binding.yaml b/prometheus/operator/base/prometheus-monitoring-role-binding.yaml new file mode 100644 index 000000000..efbbc7c45 --- /dev/null +++ b/prometheus/operator/base/prometheus-monitoring-role-binding.yaml @@ -0,0 +1,13 @@ +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: odh-prometheus-monitoring-rb +subjects: + - kind: ServiceAccount + name: prometheus-k8s + namespace: $(namespace) +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: odh-prometheus-monitoring diff --git a/prometheus/operator/base/prometheus-monitoring-role.yaml b/prometheus/operator/base/prometheus-monitoring-role.yaml new file mode 100644 index 000000000..cd082ea1c --- /dev/null +++ b/prometheus/operator/base/prometheus-monitoring-role.yaml @@ -0,0 +1,23 @@ +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: odh-prometheus-monitoring + namespace: opendatahub +rules: + - verbs: + - get + - list + - watch + apiGroups: + - '' + resources: + - services + - endpoints + - pods + - verbs: + - get + apiGroups: + - '' + resources: + - configmaps diff --git a/prometheus/operator/base/prometheus.yaml b/prometheus/operator/base/prometheus.yaml index b3b0d5ac4..64ebccb85 100644 --- a/prometheus/operator/base/prometheus.yaml +++ b/prometheus/operator/base/prometheus.yaml @@ -1,12 +1,12 @@ apiVersion: monitoring.coreos.com/v1 kind: Prometheus metadata: - name: prometheus + name: odh-monitoring labels: - prometheus: k8s + app: odh-monitoring namespace: prometheus spec: - replicas: 1 + replicas: 2 serviceAccountName: prometheus-k8s securityContext: {} serviceMonitorSelector: diff --git a/prometheus/operator/base/servicemonitor.yaml b/prometheus/operator/base/service-monitors/application-service-monitor.yaml similarity index 83% rename from prometheus/operator/base/servicemonitor.yaml rename to prometheus/operator/base/service-monitors/application-service-monitor.yaml index 855329e0e..37b776bf2 100644 --- a/prometheus/operator/base/servicemonitor.yaml +++ b/prometheus/operator/base/service-monitors/application-service-monitor.yaml @@ -3,10 +3,10 @@ kind: ServiceMonitor metadata: labels: team: opendatahub - name: odhservicemonitor + name: odh-application-servicemonitor spec: endpoints: - - port: web # odh-operator, Argo + - port: metrics # Argo - bearerTokenSecret: key: PROMETHEUS_API_TOKEN name: jupyterhub diff --git a/prometheus/operator/base/service-monitors/kustomization.yaml b/prometheus/operator/base/service-monitors/kustomization.yaml new file mode 100644 index 000000000..8f2690ab5 --- /dev/null +++ b/prometheus/operator/base/service-monitors/kustomization.yaml @@ -0,0 +1,5 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- application-service-monitor.yaml +- operator-service-monitor.yaml diff --git a/prometheus/operator/base/service-monitors/operator-service-monitor.yaml b/prometheus/operator/base/service-monitors/operator-service-monitor.yaml new file mode 100644 index 000000000..60130314f --- /dev/null +++ b/prometheus/operator/base/service-monitors/operator-service-monitor.yaml @@ -0,0 +1,16 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + team: opendatahub + name: odh-operator-servicemonitor +spec: + endpoints: + - port: http-metrics # Open Data Hub Operator + - port: cr-metrics # Open Data Hub Operator + selector: + matchLabels: + name: opendatahub-operator + namespaceSelector: + matchNames: + - openshift-operators diff --git a/tests/basictests/prometheus.sh b/tests/basictests/prometheus.sh index cb17239e8..c1dde4fd7 100755 --- a/tests/basictests/prometheus.sh +++ b/tests/basictests/prometheus.sh @@ -20,9 +20,9 @@ function test_prometheus() { os::cmd::try_until_text "oc get pods -l k8s-app=prometheus-operator --field-selector='status.phase=Running' -o jsonpath='{$.items[*].metadata.name}'" "prometheus-operator" $odhdefaulttimeout $odhdefaultinterval runningbuspods=($(oc get pods -l k8s-app=prometheus-operator --field-selector="status.phase=Running" -o jsonpath="{$.items[*].metadata.name}")) os::cmd::expect_success_and_text "echo ${#runningbuspods[@]}" "1" - os::cmd::try_until_text "oc get pods -l app=prometheus --field-selector='status.phase=Running' -o jsonpath='{$.items[*].metadata.name}'" "prometheus-prometheus" $odhdefaulttimeout $odhdefaultinterval - runningbuspods=($(oc get pods -l app=prometheus --field-selector="status.phase=Running" -o jsonpath="{$.items[*].metadata.name}")) - os::cmd::expect_success_and_text "echo ${#runningbuspods[@]}" "1" + os::cmd::try_until_text "oc get pods -l prometheus=odh-monitoring --field-selector='status.phase=Running' -o jsonpath='{$.items[*].metadata.name}'" "prometheus-odh-monitoring" $odhdefaulttimeout $odhdefaultinterval + runningbuspods=($(oc get pods -l prometheus=odh-monitoring --field-selector="status.phase=Running" -o jsonpath="{$.items[*].metadata.name}")) + os::cmd::expect_success_and_text "echo ${#runningbuspods[@]}" "2" test_promportal }