From 75b482b0ab7e1ebf3458bedf671491a55751ee64 Mon Sep 17 00:00:00 2001 From: onbjerg Date: Wed, 3 Aug 2016 00:03:46 +0200 Subject: [PATCH 1/7] [kubernetes] add metrics for k8s deployments tags pods with their deployment (if any), and adds the following metrics: - kubernetes.pods.desired - kubernetes.pods.available - kubernetes.pods.unavailable adds the tag kube_deployment for pods and the aforementioned metrics --- checks.d/kubernetes.py | 32 ++++++++++++++++++++++++++++++-- utils/kubeutil.py | 27 +++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/checks.d/kubernetes.py b/checks.d/kubernetes.py index 1f3372472f..e1097afa2e 100644 --- a/checks.d/kubernetes.py +++ b/checks.d/kubernetes.py @@ -317,13 +317,41 @@ def _update_metrics(self, instance, pods_list): self._update_pods_metrics(instance, pods_list) + deployment_list = self.kubeutil.retrieve_deployments_list() + self._update_deployment_metrics(instance, deployment_list) + + def _update_deployment_metrics(self, instance, deployments): + deployments_map = dict() + for deployment in deployments['items']: + try: + pod_count = deployment['status']['replicas'] + pod_desired_count = deployment['spec']['replicas'] + pod_available_count = deployment['status']['availableReplicas'] + deployments_map[deployment['metadata']['name']] = { + 'pods': pod_count, + 'desired': pod_desired_count, + 'available': pod_available_count, + 'unavailable': pod_count - pod_available_count + } + except KeyError: + continue + + tags = instance.get('tags', []) + for dep, stats in deployments_map.iteritems(): + _tags = tags[:] + _tags.append('kube_deployment:{0}'.format(dep)) + self.publish_gauge(self, NAMESPACE + '.pods.running', stats.pods, _tags) + self.publish_gauge(self, NAMESPACE + '.pods.desired', stats.desired, _tags) + self.publish_gauge(self, NAMESPACE + '.pods.available', stats.available, _tags) + self.publish_gauge(self, NAMESPACE + '.pods.unavailable', stats.unavailable, _tags) + def _update_pods_metrics(self, instance, pods): supported_kinds = [ "DaemonSet", - "Deployment", "Job", "ReplicationController", - "ReplicaSet", + "Deployment", + "ReplicaSet" ] controllers_map = defaultdict(int) diff --git a/utils/kubeutil.py b/utils/kubeutil.py index 2dc9dc1c70..5854ace698 100644 --- a/utils/kubeutil.py +++ b/utils/kubeutil.py @@ -7,6 +7,7 @@ import logging import os from urlparse import urljoin +import string # project from util import check_yaml @@ -27,6 +28,7 @@ class KubeUtil: DEFAULT_METHOD = 'http' METRICS_PATH = '/api/v1.3/subcontainers/' PODS_LIST_PATH = '/pods/' + DEPLOYMENTS_LIST_PATH = '/apis/extensions/v1beta1/deployments' DEFAULT_CADVISOR_PORT = 4194 DEFAULT_KUBELET_PORT = 10255 DEFAULT_MASTER_PORT = 8080 @@ -68,6 +70,7 @@ def __init__(self, instance=None): self.metrics_url = urljoin(self.cadvisor_url, KubeUtil.METRICS_PATH) self.pods_list_url = urljoin(self.kubelet_api_url, KubeUtil.PODS_LIST_PATH) self.kube_health_url = urljoin(self.kubelet_api_url, 'healthz') + self.deployments_list_url = urljoin(self.kubelet_api_url, KubeUtil.DEPLOYMENTS_LIST_PATH) # keep track of the latest k8s event we collected and posted # default value is 0 but TTL for k8s events is one hour anyways @@ -90,9 +93,15 @@ def extract_kube_labels(self, pods_list, excluded_keys=None): name = metadata.get("name") namespace = metadata.get("namespace") labels = metadata.get("labels") + if name and labels and namespace: key = "%s/%s" % (namespace, name) + # Add deployment name (if any) + deployment_name = self.get_deployment_name(pod) + if deployment_name is not None: + kube_labels[key].append(u"kube_deployment:%s" % (deployment_name)) + for k, v in labels.iteritems(): if k in excluded_keys: continue @@ -116,6 +125,24 @@ def extract_meta(self, pods_list, field_name): uids.append(value) return uids + def get_deployment_name(self, pod): + # HACK(onbjerg): + # In k8s there is no direct name to link a ReplicaSet + # (and thus a pod) to a Deployment. Naming conventions + # exist for ReplicaSet created by deployments, though. + # + # In order to retrieve the deployment that created a pod + # we must take the pods ReplicaSet name and remove the + # pods template hash. + serialized_reference = json.loads(pod['metadata']['annotations']['kubernetes.io/created-by']) + if serialized_reference['reference']['kind'] == 'ReplicaSet': + template_hash = pod['metadata']['labels']['pod-template-hash'] + return string.replace(serialized_reference['reference']['name'], '-' + template_hash, '') + return None + + def retrieve_deployments_list(self): + return retrieve_json(self.deployments_list_url) + def retrieve_pods_list(self): """ Retrieve the list of pods for this cluster querying the kubelet API. From ff17804b8a7e7df3461aa515ed9fe893dab0897b Mon Sep 17 00:00:00 2001 From: onbjerg Date: Wed, 3 Aug 2016 00:48:26 +0200 Subject: [PATCH 2/7] [kubernetes] add deployments fixture --- .../fixtures/kubernetes/deployments_list.json | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 tests/checks/fixtures/kubernetes/deployments_list.json diff --git a/tests/checks/fixtures/kubernetes/deployments_list.json b/tests/checks/fixtures/kubernetes/deployments_list.json new file mode 100644 index 0000000000..c836e93ccf --- /dev/null +++ b/tests/checks/fixtures/kubernetes/deployments_list.json @@ -0,0 +1,136 @@ +{ + "kind": "DeploymentList", + "apiVersion": "extensions/v1beta1", + "metadata": { + "selfLink": "/apis/extensions/v1beta1/deployments", + "resourceVersion": "1323769" + }, + "items": [ + { + "metadata": { + "name": "heapster-v1.1.0", + "namespace": "kube-system", + "selfLink": "/apis/extensions/v1beta1/namespaces/kube-system/deployments/heapster-v1.1.0", + "uid": "4707e5fd-4e98-11e6-8ed7-42010af0019d", + "resourceVersion": "1146651", + "generation": 4, + "creationTimestamp": "2016-07-20T16:37:41Z", + "labels": { + "k8s-app": "heapster", + "kubernetes.io/cluster-service": "true", + "version": "v1.1.0" + }, + "annotations": { + "deployment.kubernetes.io/revision": "2" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "k8s-app": "heapster", + "version": "v1.1.0" + } + }, + "template": { + "metadata": { + "creationTimestamp": null, + "labels": { + "k8s-app": "heapster", + "version": "v1.1.0" + } + }, + "spec": { + "containers": [ + { + "name": "heapster", + "image": "eu.gcr.io/google_containers/heapster:v1.1.0", + "command": [ + "/heapster", + "--source=kubernetes.summary_api:''" + ], + "resources": { + "limits": { + "cpu": "88m", + "memory": "204Mi" + }, + "requests": { + "cpu": "88m", + "memory": "204Mi" + } + }, + "terminationMessagePath": "/dev/termination-log", + "imagePullPolicy": "IfNotPresent" + }, + { + "name": "heapster-nanny", + "image": "eu.gcr.io/google_containers/addon-resizer:1.3", + "command": [ + "/pod_nanny", + "--cpu=80m", + "--extra-cpu=0.5m", + "--memory=140Mi", + "--extra-memory=4Mi", + "--threshold=5", + "--deployment=heapster-v1.1.0", + "--container=heapster", + "--poll-period=300000", + "--estimator=exponential" + ], + "env": [ + { + "name": "MY_POD_NAME", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "metadata.name" + } + } + }, + { + "name": "MY_POD_NAMESPACE", + "valueFrom": { + "fieldRef": { + "apiVersion": "v1", + "fieldPath": "metadata.namespace" + } + } + } + ], + "resources": { + "limits": { + "cpu": "50m", + "memory": "92960Ki" + }, + "requests": { + "cpu": "50m", + "memory": "92960Ki" + } + }, + "terminationMessagePath": "/dev/termination-log", + "imagePullPolicy": "IfNotPresent" + } + ], + "restartPolicy": "Always", + "terminationGracePeriodSeconds": 30, + "dnsPolicy": "ClusterFirst", + "securityContext": {} + } + }, + "strategy": { + "type": "RollingUpdate", + "rollingUpdate": { + "maxUnavailable": 1, + "maxSurge": 1 + } + } + }, + "status": { + "observedGeneration": 4, + "replicas": 1, + "updatedReplicas": 1, + "availableReplicas": 1 + } + } + ] +} From 658a84232500e52fe73ddfe6e7b096460cce9674 Mon Sep 17 00:00:00 2001 From: onbjerg Date: Wed, 3 Aug 2016 00:48:41 +0200 Subject: [PATCH 3/7] [kubernetes] mock deployment list --- tests/checks/mock/test_kubernetes.py | 51 ++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/tests/checks/mock/test_kubernetes.py b/tests/checks/mock/test_kubernetes.py index f1df7cef6a..7ac4d7de0c 100644 --- a/tests/checks/mock/test_kubernetes.py +++ b/tests/checks/mock/test_kubernetes.py @@ -60,9 +60,12 @@ def test_fail_1_1(self, *args): "instances": [{"host": "foo"}] } - # Can't use run_check_twice due to specific metrics - self.run_check(config, force_reload=True) - self.assertServiceCheck("kubernetes.kubelet.check", status=AgentCheck.CRITICAL, tags=None, count=1) + with mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list', side_effect=lambda: json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False))): + with mock.patch('utils.kubeutil.KubeUtil.retrieve_deployments_list', side_effect=lambda: json.loads(Fixtures.read_file("deployments_list.json", string_escape=False))): + with mock.patch('utils.dockerutil.DockerUtil.get_hostname', side_effect=lambda: 'foo'): + # Can't use run_check_twice due to specific metrics + self.run_check(config, mocks=mocks, force_reload=True) + self.assertServiceCheck("kubernetes.kubelet.check", status=AgentCheck.CRITICAL, tags=None, count=1) @mock.patch('utils.kubeutil.KubeUtil.retrieve_json_auth') @mock.patch('utils.kubeutil.KubeUtil.retrieve_metrics', @@ -82,8 +85,12 @@ def test_metrics_1_1(self, *args): } ] } - # Can't use run_check_twice due to specific metrics - self.run_check_twice(config, mocks=mocks, force_reload=True) + # parts of the json returned by the kubelet api is escaped, keep it untouched + with mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list', side_effect=lambda: json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False))): + with mock.patch('utils.kubeutil.KubeUtil.retrieve_deployments_list', side_effect=lambda: json.loads(Fixtures.read_file("deployments_list.json", string_escape=False))): + with mock.patch('utils.dockerutil.DockerUtil.get_hostname', side_effect=lambda: 'foo'): + # Can't use run_check_twice due to specific metrics + self.run_check_twice(config, mocks=mocks, force_reload=True) expected_tags = [ (['container_name:/kubelet', 'pod_name:no_pod'], [MEM, CPU, NET, DISK]), @@ -140,8 +147,13 @@ def test_historate_1_1(self, *args): } ] } - # Can't use run_check_twice due to specific metrics - self.run_check_twice(config, mocks=mocks, force_reload=True) + + # parts of the json returned by the kubelet api is escaped, keep it untouched + with mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list', side_effect=lambda: json.loads(Fixtures.read_file("pods_list_1.1.json", string_escape=False))): + with mock.patch('utils.kubeutil.KubeUtil.retrieve_deployments_list', side_effect=lambda: json.loads(Fixtures.read_file("deployments_list.json", string_escape=False))): + with mock.patch('utils.dockerutil.DockerUtil.get_hostname', side_effect=lambda: 'foo'): + # Can't use run_check_twice due to specific metrics + self.run_check_twice(config, mocks=mocks, force_reload=True) metric_suffix = ["count", "avg", "median", "max", "95percentile"] @@ -181,9 +193,12 @@ def test_fail_1_2(self, *args): "instances": [{"host": "foo"}] } - # Can't use run_check_twice due to specific metrics - self.run_check(config, force_reload=True) - self.assertServiceCheck("kubernetes.kubelet.check", status=AgentCheck.CRITICAL) + with mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list', side_effect=lambda: json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False))): + with mock.patch('utils.kubeutil.KubeUtil.retrieve_deployments_list', side_effect=lambda: json.loads(Fixtures.read_file("deployments_list.json", string_escape=False))): + with mock.patch('utils.dockerutil.DockerUtil.get_hostname', side_effect=lambda: 'foo'): + # Can't use run_check_twice due to specific metrics + self.run_check(config, mocks=mocks, force_reload=True) + self.assertServiceCheck("kubernetes.kubelet.check", status=AgentCheck.CRITICAL) @mock.patch('utils.kubeutil.KubeUtil.retrieve_json_auth') @mock.patch('utils.kubeutil.KubeUtil.retrieve_metrics', @@ -202,8 +217,12 @@ def test_metrics_1_2(self, *args): } ] } - # Can't use run_check_twice due to specific metrics - self.run_check_twice(config, mocks=mocks, force_reload=True) + # parts of the json returned by the kubelet api is escaped, keep it untouched + with mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list', side_effect=lambda: json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False))): + with mock.patch('utils.kubeutil.KubeUtil.retrieve_deployments_list', side_effect=lambda: json.loads(Fixtures.read_file("deployments_list.json", string_escape=False))): + with mock.patch('utils.dockerutil.DockerUtil.get_hostname', side_effect=lambda: 'foo'): + # Can't use run_check_twice due to specific metrics + self.run_check_twice(config, mocks=mocks, force_reload=True) expected_tags = [ (['container_name:/kubelet', 'pod_name:no_pod'], [MEM, CPU, NET, DISK]), @@ -246,8 +265,12 @@ def test_historate_1_2(self, *args): ] } - # Can't use run_check_twice due to specific metrics - self.run_check_twice(config, mocks=mocks, force_reload=True) + # parts of the json returned by the kubelet api is escaped, keep it untouched + with mock.patch('utils.kubeutil.KubeUtil.retrieve_pods_list', side_effect=lambda: json.loads(Fixtures.read_file("pods_list_1.2.json", string_escape=False))): + with mock.patch('utils.kubeutil.KubeUtil.retrieve_deployments_list', side_effect=lambda: json.loads(Fixtures.read_file("deployments_list.json", string_escape=False))): + with mock.patch('utils.dockerutil.DockerUtil.get_hostname', side_effect=lambda: 'foo'): + # Can't use run_check_twice due to specific metrics + self.run_check_twice(config, mocks=mocks, force_reload=True) metric_suffix = ["count", "avg", "median", "max", "95percentile"] From e1b53f709169fe1fd60697131d029fa971522d9b Mon Sep 17 00:00:00 2001 From: onbjerg Date: Wed, 3 Aug 2016 00:54:29 +0200 Subject: [PATCH 4/7] [kubernetes] test deployment metrics --- tests/checks/mock/test_kubernetes.py | 51 +++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/tests/checks/mock/test_kubernetes.py b/tests/checks/mock/test_kubernetes.py index 7ac4d7de0c..1328cf79d5 100644 --- a/tests/checks/mock/test_kubernetes.py +++ b/tests/checks/mock/test_kubernetes.py @@ -22,9 +22,12 @@ NET_ERRORS = "net_errors" DISK = "disk" DISK_USAGE = "disk_usage" -PODS = "pods" +PODS = "running_pods" LIM = "limits" REQ = "requests" +DESIRED_PODS = "desired_pods" +AVAILABLE_PODS = "available_pods" +UNAVAILABLE_PODS = "unavailable_pods" METRICS = [ ('kubernetes.memory.usage', MEM), @@ -42,6 +45,9 @@ ('kubernetes.cpu.requests', REQ), ('kubernetes.memory.limits', LIM), ('kubernetes.memory.requests', REQ), + ('kubernetes.pods.desired', DESIRED_PODS), + ('kubernetes.pods.available', AVAILABLE_PODS), + ('kubernetes.pods.unavailable', UNAVAILABLE_PODS), ] @@ -94,17 +100,26 @@ def test_metrics_1_1(self, *args): expected_tags = [ (['container_name:/kubelet', 'pod_name:no_pod'], [MEM, CPU, NET, DISK]), + (['kube_replication_controller:propjoe', 'kube_namespace:default', 'container_name:k8s_POD.e4cc795_propjoe-dhdzk_default_ba151259-36e0-11e5-84ce-42010af01c62_ef0ed5f9', 'pod_name:default/propjoe-dhdzk'], [MEM, CPU, FS, NET, NET_ERRORS]), + (['container_name:/kube-proxy', 'pod_name:no_pod'], [MEM, CPU, NET]), + (['kube_replication_controller:kube-dns-v8', 'kube_namespace:kube-system', 'container_name:k8s_POD.2688308a_kube-dns-v8-smhcb_kube-system_b80ffab3-3619-11e5-84ce-42010af01c62_295f14ff', 'pod_name:kube-system/kube-dns-v8-smhcb'], [MEM, CPU, FS, NET, NET_ERRORS]), + (['container_name:/docker-daemon', 'pod_name:no_pod'], [MEM, CPU, DISK, NET]), + (['kube_replication_controller:kube-dns-v8', 'kube_namespace:kube-system', 'container_name:k8s_etcd.2e44beff_kube-dns-v8-smhcb_kube-system_b80ffab3-3619-11e5-84ce-42010af01c62_e3e504ad', 'pod_name:kube-system/kube-dns-v8-smhcb'], [MEM, CPU, FS, NET, NET_ERRORS, DISK]), (['kube_replication_controller:fluentd-cloud-logging-kubernetes-minion', 'kube_namespace:kube-system', 'container_name:k8s_POD.e4cc795_fluentd-cloud-logging-kubernetes-minion-mu4w_kube-system_d0feac1ad02da9e97c4bf67970ece7a1_49dd977d', 'pod_name:kube-system/fluentd-cloud-logging-kubernetes-minion-mu4w'], [MEM, CPU, FS, NET, NET_ERRORS, DISK]), (['kube_replication_controller:kube-dns-v8', 'kube_namespace:kube-system', 'container_name:k8s_skydns.1e752dc0_kube-dns-v8-smhcb_kube-system_b80ffab3-3619-11e5-84ce-42010af01c62_7c1345a1', 'pod_name:kube-system/kube-dns-v8-smhcb'], [MEM, CPU, FS, NET, NET_ERRORS]), + (['container_name:/', 'pod_name:no_pod'], [MEM, CPU, FS, NET, NET_ERRORS, DISK]), (['container_name:/system/docker', 'pod_name:no_pod'], [MEM, CPU, DISK, NET]), + (['kube_replication_controller:propjoe', 'kube_namespace:default', 'container_name:k8s_propjoe.21f63023_propjoe-dhdzk_default_ba151259-36e0-11e5-84ce-42010af01c62_19879457', 'pod_name:default/propjoe-dhdzk'], [MEM, CPU, FS, NET, NET_ERRORS]), + (['container_name:/system', 'pod_name:no_pod'], [MEM, CPU, NET, DISK]), + (['kube_replication_controller:kube-ui-v1', 'kube_namespace:kube-system', 'container_name:k8s_POD.3b46e8b9_kube-ui-v1-sv2sq_kube-system_b7e8f250-3619-11e5-84ce-42010af01c62_209ed1dc', 'pod_name:kube-system/kube-ui-v1-sv2sq'], [MEM, CPU, FS, NET, NET_ERRORS]), (['kube_replication_controller:kube-dns-v8', 'kube_namespace:kube-system', 'container_name:k8s_kube2sky.1afa6a47_kube-dns-v8-smhcb_kube-system_b80ffab3-3619-11e5-84ce-42010af01c62_624bc34c', 'pod_name:kube-system/kube-dns-v8-smhcb'], [MEM, CPU, FS, NET, NET_ERRORS]), (['kube_replication_controller:propjoe', 'kube_namespace:default', 'container_name:k8s_POD.e4cc795_propjoe-lkc3l_default_3a9b1759-4055-11e5-84ce-42010af01c62_45d1185b', 'pod_name:default/propjoe-lkc3l'], [MEM, CPU, FS, NET, NET_ERRORS]), @@ -114,12 +129,20 @@ def test_metrics_1_1(self, *args): (['kube_replication_controller:propjoe','kube_namespace:default', 'container_name:k8s_propjoe.21f63023_propjoe-lkc3l_default_3a9b1759-4055-11e5-84ce-42010af01c62_9fe8b7b0', 'pod_name:default/propjoe-lkc3l'], [MEM, CPU, FS, NET, NET_ERRORS]), (['kube_replication_controller:kube-dns-v8','kube_namespace:kube-system', 'container_name:k8s_healthz.4469a25d_kube-dns-v8-smhcb_kube-system_b80ffab3-3619-11e5-84ce-42010af01c62_241c34d1', 'pod_name:kube-system/kube-dns-v8-smhcb'], [MEM, CPU, FS, NET, NET_ERRORS, DISK]), (['kube_replication_controller:fluentd-cloud-logging-kubernetes-minion','kube_namespace:kube-system', 'container_name:k8s_fluentd-cloud-logging.7721935b_fluentd-cloud-logging-kubernetes-minion-mu4w_kube-system_d0feac1ad02da9e97c4bf67970ece7a1_2c3c0879', 'pod_name:kube-system/fluentd-cloud-logging-kubernetes-minion-mu4w'], [MEM, CPU, FS, NET, NET_ERRORS, DISK]), + (['container_name:dd-agent', 'pod_name:no_pod'], [MEM, CPU, FS, NET, NET_ERRORS, DISK]), + (['kube_replication_controller:l7-lb-controller'], [PODS]), (['kube_replication_controller:redis-slave'], [PODS]), (['kube_replication_controller:frontend'], [PODS]), (['kube_replication_controller:heapster-v11'], [PODS]), - ([], [LIM, REQ]) # container from kubernetes api doesn't have a corresponding entry in Cadvisor + + ([], [LIM, REQ]), # container from kubernetes api doesn't have a corresponding entry in Cadvisor + + (['kube_deployment:heapster-v1.1.0'], [PODS]), + (['kube_deployment:heapster-v1.1.0'], [DESIRED_PODS]), + (['kube_deployment:heapster-v1.1.0'], [AVAILABLE_PODS]), + (['kube_deployment:heapster-v1.1.0'], [UNAVAILABLE_PODS]), ] for m, _type in METRICS: for tags, types in expected_tags: @@ -171,7 +194,13 @@ def test_historate_1_1(self, *args): (['kube_replication_controller:redis-slave'], [PODS]), (['kube_replication_controller:frontend'], [PODS]), (['kube_replication_controller:heapster-v11'], [PODS]), - ([], [LIM, REQ]) # container from kubernetes api doesn't have a corresponding entry in Cadvisor + + ([], [LIM, REQ]), # container from kubernetes api doesn't have a corresponding entry in Cadvisor + + (['kube_deployment:heapster-v1.1.0'], [PODS]), + (['kube_deployment:heapster-v1.1.0'], [DESIRED_PODS]), + (['kube_deployment:heapster-v1.1.0'], [AVAILABLE_PODS]), + (['kube_deployment:heapster-v1.1.0'], [UNAVAILABLE_PODS]), ] for m, _type in METRICS: @@ -235,7 +264,13 @@ def test_metrics_1_2(self, *args): 'pod_name:default/dd-agent-1rxlh', 'kube_namespace:default', 'kube_app:dd-agent', 'kube_foo:bar', 'kube_bar:baz', 'kube_replication_controller:dd-agent'], [LIM, REQ, MEM, CPU, NET, DISK, DISK_USAGE]), (['kube_replication_controller:dd-agent'], [PODS]), - ([], [LIM, REQ]) # container from kubernetes api doesn't have a corresponding entry in Cadvisor + + ([], [LIM, REQ]), # container from kubernetes api doesn't have a corresponding entry in Cadvisor + + (['kube_deployment:heapster-v1.1.0'], [PODS]), + (['kube_deployment:heapster-v1.1.0'], [DESIRED_PODS]), + (['kube_deployment:heapster-v1.1.0'], [AVAILABLE_PODS]), + (['kube_deployment:heapster-v1.1.0'], [UNAVAILABLE_PODS]), ] for m, _type in METRICS: @@ -279,8 +314,14 @@ def test_historate_1_2(self, *args): 'kube_bar:baz', 'kube_replication_controller:dd-agent'], [MEM, CPU, NET, DISK, NET_ERRORS, DISK_USAGE, LIM, REQ]), (['pod_name:no_pod'], [MEM, CPU, FS, NET, NET_ERRORS, DISK]), + (['kube_replication_controller:dd-agent'], [PODS]), - ([], [LIM, REQ]) # container from kubernetes api doesn't have a corresponding entry in Cadvisor + ([], [LIM, REQ]), # container from kubernetes api doesn't have a corresponding entry in Cadvisor + + (['kube_deployment:heapster-v1.1.0'], [PODS]), + (['kube_deployment:heapster-v1.1.0'], [DESIRED_PODS]), + (['kube_deployment:heapster-v1.1.0'], [AVAILABLE_PODS]), + (['kube_deployment:heapster-v1.1.0'], [UNAVAILABLE_PODS]), ] for m, _type in METRICS: From 525948a58c3bed2266db6ff1cf0e4e5940c1a39f Mon Sep 17 00:00:00 2001 From: onbjerg Date: Wed, 3 Aug 2016 00:56:09 +0200 Subject: [PATCH 5/7] [kubernetes] import json in kubeutil --- utils/kubeutil.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/kubeutil.py b/utils/kubeutil.py index 5854ace698..371d06c805 100644 --- a/utils/kubeutil.py +++ b/utils/kubeutil.py @@ -8,6 +8,7 @@ import os from urlparse import urljoin import string +import json # project from util import check_yaml From 9e4440dabc0ee71f680ee7c20f6b5c53dcd54a19 Mon Sep 17 00:00:00 2001 From: onbjerg Date: Wed, 3 Aug 2016 01:39:44 +0200 Subject: [PATCH 6/7] [kubernetes] fix tests --- checks.d/kubernetes.py | 8 ++++---- utils/kubeutil.py | 11 +++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/checks.d/kubernetes.py b/checks.d/kubernetes.py index e1097afa2e..9b903dcaa2 100644 --- a/checks.d/kubernetes.py +++ b/checks.d/kubernetes.py @@ -340,10 +340,10 @@ def _update_deployment_metrics(self, instance, deployments): for dep, stats in deployments_map.iteritems(): _tags = tags[:] _tags.append('kube_deployment:{0}'.format(dep)) - self.publish_gauge(self, NAMESPACE + '.pods.running', stats.pods, _tags) - self.publish_gauge(self, NAMESPACE + '.pods.desired', stats.desired, _tags) - self.publish_gauge(self, NAMESPACE + '.pods.available', stats.available, _tags) - self.publish_gauge(self, NAMESPACE + '.pods.unavailable', stats.unavailable, _tags) + self.publish_gauge(self, NAMESPACE + '.pods.running', stats['pods'], _tags) + self.publish_gauge(self, NAMESPACE + '.pods.desired', stats['desired'], _tags) + self.publish_gauge(self, NAMESPACE + '.pods.available', stats['available'], _tags) + self.publish_gauge(self, NAMESPACE + '.pods.unavailable', stats['unavailable'], _tags) def _update_pods_metrics(self, instance, pods): supported_kinds = [ diff --git a/utils/kubeutil.py b/utils/kubeutil.py index 371d06c805..7194b4c578 100644 --- a/utils/kubeutil.py +++ b/utils/kubeutil.py @@ -135,10 +135,13 @@ def get_deployment_name(self, pod): # In order to retrieve the deployment that created a pod # we must take the pods ReplicaSet name and remove the # pods template hash. - serialized_reference = json.loads(pod['metadata']['annotations']['kubernetes.io/created-by']) - if serialized_reference['reference']['kind'] == 'ReplicaSet': - template_hash = pod['metadata']['labels']['pod-template-hash'] - return string.replace(serialized_reference['reference']['name'], '-' + template_hash, '') + try: + serialized_reference = json.loads(pod['metadata']['annotations']['kubernetes.io/created-by']) + if serialized_reference['reference']['kind'] == 'ReplicaSet': + template_hash = pod['metadata']['labels']['pod-template-hash'] + return string.replace(serialized_reference['reference']['name'], '-' + template_hash, '') + except KeyError: + return None return None def retrieve_deployments_list(self): From c013153177a70ee693e2d62e2e5f78da283c411e Mon Sep 17 00:00:00 2001 From: Oliver Date: Fri, 30 Sep 2016 22:00:32 +0200 Subject: [PATCH 7/7] [kubernetes] fix tests --- tests/checks/mock/test_kubernetes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/checks/mock/test_kubernetes.py b/tests/checks/mock/test_kubernetes.py index 1328cf79d5..a8337dc21a 100644 --- a/tests/checks/mock/test_kubernetes.py +++ b/tests/checks/mock/test_kubernetes.py @@ -70,7 +70,7 @@ def test_fail_1_1(self, *args): with mock.patch('utils.kubeutil.KubeUtil.retrieve_deployments_list', side_effect=lambda: json.loads(Fixtures.read_file("deployments_list.json", string_escape=False))): with mock.patch('utils.dockerutil.DockerUtil.get_hostname', side_effect=lambda: 'foo'): # Can't use run_check_twice due to specific metrics - self.run_check(config, mocks=mocks, force_reload=True) + self.run_check(config, force_reload=True) self.assertServiceCheck("kubernetes.kubelet.check", status=AgentCheck.CRITICAL, tags=None, count=1) @mock.patch('utils.kubeutil.KubeUtil.retrieve_json_auth') @@ -226,7 +226,7 @@ def test_fail_1_2(self, *args): with mock.patch('utils.kubeutil.KubeUtil.retrieve_deployments_list', side_effect=lambda: json.loads(Fixtures.read_file("deployments_list.json", string_escape=False))): with mock.patch('utils.dockerutil.DockerUtil.get_hostname', side_effect=lambda: 'foo'): # Can't use run_check_twice due to specific metrics - self.run_check(config, mocks=mocks, force_reload=True) + self.run_check(config, force_reload=True) self.assertServiceCheck("kubernetes.kubelet.check", status=AgentCheck.CRITICAL) @mock.patch('utils.kubeutil.KubeUtil.retrieve_json_auth')