Skip to content

Commit

Permalink
Updating Kubernetes check to handle multiple namespaces fix #2838
Browse files Browse the repository at this point in the history
The configuration key 'namespace' is renamed 'namespaces' so multiple
names can be specified (if present the old key will be added to the
'namespaces' list).

A new configuration key 'namespace_name_regexp' is added to allow
namespaces selection using regexp.

We now pull every event from Kubernetes and filter them the
namespace list.
  • Loading branch information
hush-hush committed Nov 18, 2016
1 parent 3314956 commit 69debde
Show file tree
Hide file tree
Showing 6 changed files with 226 additions and 13 deletions.
48 changes: 42 additions & 6 deletions checks.d/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
'network.??_bytes',
'cpu.*.total']
DEFAULT_COLLECT_EVENTS = False
DEFAULT_NAMESPACES = ['default']

NET_ERRORS = ['rx_errors', 'tx_errors', 'rx_dropped', 'tx_dropped']

Expand Down Expand Up @@ -90,6 +91,15 @@ def __init__(self, name, init_config, agentConfig, instances=None):
if not self.kubeutil.host:
raise Exception('Unable to retrieve Docker hostname and host parameter is not set')

self.k8s_namespace_regexp = None
if inst:
regexp = inst.get('namespace_name_regexp', None)
if regexp:
try:
self.k8s_namespace_regexp = re.compile(regexp)
except re.error as e:
self.log.warning('Invalid regexp for "namespace_name_regexp" in configuration (ignoring regexp): %s' % str(e))

def _perform_kubelet_checks(self, url):
service_check_base = NAMESPACE + '.kubelet.check'
is_ok = True
Expand Down Expand Up @@ -423,16 +433,38 @@ def _process_events(self, instance, pods_list):
node_ip, node_name = self.kubeutil.get_node_info()
self.log.debug('Processing events on {} [{}]'.format(node_name, node_ip))

k8s_namespace = instance.get('namespace', 'default')
events_endpoint = '{}/namespaces/{}/events'.format(self.kubeutil.kubernetes_api_url, k8s_namespace)
k8s_namespaces = instance.get('namespaces', DEFAULT_NAMESPACES)
if not isinstance(k8s_namespaces, list):
self.log.warning('Configuration key "namespaces" is not a list: fallback to the default value')
k8s_namespaces = DEFAULT_NAMESPACES

# handle old config value
if 'namespace' in instance and instance.get('namespace') not in (None, 'default'):
self.log.warning('''The 'namespace' parameter is deprecated and will stop being supported starting '''
'''from 5.12. Please use 'namespaces' and/or 'namespace_name_regexp' instead.''')
k8s_namespaces.append(instance.get('namespace'))

if self.k8s_namespace_regexp:
namespaces_endpoint = '{}/namespaces'.format(self.kubeutil.kubernetes_api_url)
self.log.debug('Kubernetes API endpoint to query namespaces: %s' % namespaces_endpoint)

namespaces = self.kubeutil.retrieve_json_auth(namespaces_endpoint, self.kubeutil.get_auth_token())
for namespace in namespaces.get('items', []):
name = namespace.get('metadata', {}).get('name', None)
if name and self.k8s_namespace_regexp.match(name):
k8s_namespaces.append(name)

k8s_namespaces = set(k8s_namespaces)

events_endpoint = '{}/events'.format(self.kubeutil.kubernetes_api_url)
self.log.debug('Kubernetes API endpoint to query events: %s' % events_endpoint)

events = self.kubeutil.retrieve_json_auth(events_endpoint, self.kubeutil.get_auth_token())
event_items = events.get('items') or []
last_read = self.kubeutil.last_event_collection_ts[k8s_namespace]
last_read = self.kubeutil.last_event_collection_ts
most_recent_read = 0

self.log.debug('Found {} events, filtering out using timestamp: {}'.format(len(event_items), last_read))
self.log.debug('Found {} events, filtering out using timestamp: {} and namespaces: {}'.format(len(event_items), last_read, k8s_namespaces))

for event in event_items:
# skip if the event is too old
Expand All @@ -442,6 +474,10 @@ def _process_events(self, instance, pods_list):

involved_obj = event.get('involvedObject', {})

# filter events by white listed namespaces (empty namespace belong to the 'default' one)
if involved_obj.get('namespace', 'default') not in k8s_namespaces:
continue

tags = self.kubeutil.extract_event_tags(event)

# compute the most recently seen event, without relying on items order
Expand All @@ -467,5 +503,5 @@ def _process_events(self, instance, pods_list):
self.event(dd_event)

if most_recent_read > 0:
self.kubeutil.last_event_collection_ts[k8s_namespace] = most_recent_read
self.log.debug('_last_event_collection_ts is now {}'.format(most_recent_read))
self.kubeutil.last_event_collection_ts = most_recent_read
self.log.debug('last_event_collection_ts is now {}'.format(most_recent_read))
16 changes: 12 additions & 4 deletions conf.d/kubernetes.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,18 @@ instances:
# collect_events: false
#
#
# The namespace for which events should be collected.
# If not modified, the default namespace will be used.
# The namespaces for which events should be collected.
# If not modified, the 'default' namespace will be used.
#
# namespace: default
# namespaces:
# - default

# The regexp used to select namespaces for which events should be collected.
# The matched namespaces will be added to the "namespaces" list.
# If empty, regexp selection will be ignored.
#
# namespace_name_regexp:


# use_histogram controls whether we send detailed metrics, i.e. one per container.
# When false, we send detailed metrics corresponding to individual containers, tagging by container id
Expand All @@ -42,4 +50,4 @@ instances:
# - network.*
#
# enabled_gauges:
# - filesystem.*
# - filesystem.*
29 changes: 28 additions & 1 deletion tests/checks/fixtures/kubernetes/events.json
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,33 @@
"lastTimestamp": "2016-05-27T16:37:13Z",
"count": 1,
"type": "Normal"
},
{
"metadata": {
"name": "dd-agent-a769.148751928c4f601b",
"namespace": "test-namespace-1",
"selfLink": "/api/v1/namespaces/test-namespace-1/events/dd-agent-a769.148751928c4f601b",
"uid": "b3aff766-ab6f-11e6-819b-42010a84006e",
"resourceVersion": "2432",
"creationTimestamp": "2016-11-15T20:11:32Z"
},
"involvedObject": {
"kind": "DaemonSet",
"namespace": "test-namespace-1",
"name": "dd-agent-a769",
"uid": "8469b0d3-a769-11e6-b048-42010a84006e",
"apiVersion": "extensions",
"resourceVersion": "2835032"
},
"reason": "SuccessfulDelete",
"message": "Deleted pod: dd-agent-a769-zbdic",
"source": {
"component": "daemon-set"
},
"firstTimestamp": "2016-11-15T20:11:32Z",
"lastTimestamp": "2016-11-15T20:11:32Z",
"count": 1,
"type": "Normal"
}
]
}
}
87 changes: 87 additions & 0 deletions tests/checks/fixtures/kubernetes/namespaces.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
{
"kind": "NamespaceList",
"apiVersion": "v1",
"metadata": {
"selfLink": "/api/v1/namespaces",
"resourceVersion": "2841873"
},
"items": [
{
"metadata": {
"name": "default",
"selfLink": "/api/v1/namespaces/default",
"uid": "7f1a0d0c-65f3-11e6-b5c9-42010a840043",
"resourceVersion": "6",
"creationTimestamp": "2016-08-19T09:58:36Z"
},
"spec": {
"finalizers": [
"kubernetes"
]
},
"status": {
"phase": "Active"
}
},
{
"metadata": {
"name": "kube-system",
"selfLink": "/api/v1/namespaces/kube-system",
"uid": "7f239026-65f3-11e6-b5c9-42010a840043",
"resourceVersion": "46",
"creationTimestamp": "2016-08-19T09:58:36Z",
"annotations": {
"kubectl.kubernetes.io/last-applied-configuration": "{\"kind\":\"Namespace\",\"apiVersion\":\"v1\",\"metadata\":{\"name\":\"kube-system\",\"creationTimestamp\":null},\"spec\":{},\"status\":{}}"
}
},
"spec": {
"finalizers": [
"kubernetes"
]
},
"status": {
"phase": "Active"
}
},
{
"metadata": {
"name": "test-namespace-1",
"selfLink": "/api/v1/namespaces/test-namespace-1",
"uid": "473b5f59-a769-11e6-b048-42010a84006e",
"resourceVersion": "2152956",
"creationTimestamp": "2016-11-10T17:15:28Z",
"labels": {
"name": "test-namespace-1"
}
},
"spec": {
"finalizers": [
"kubernetes"
]
},
"status": {
"phase": "Active"
}
},
{
"metadata": {
"name": "test-namespace-2",
"selfLink": "/api/v1/namespaces/test-namespace-2",
"uid": "d4973ec1-a769-11e6-b048-42010a84006e",
"resourceVersion": "2153046",
"creationTimestamp": "2016-11-10T17:19:26Z",
"labels": {
"name": "test-namespace-2"
}
},
"spec": {
"finalizers": [
"kubernetes"
]
},
"status": {
"phase": "Active"
}
}
]
}
57 changes: 56 additions & 1 deletion tests/checks/mock/test_kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@
]


def KubeUtil_fake_retrieve_json_auth(url, auth_token, timeout=10):
if url.endswith("/namespaces"):
return json.loads(Fixtures.read_file("namespaces.json", string_escape=False))
if url.endswith("/events"):
return json.loads(Fixtures.read_file("events.json", string_escape=False))
return {}

class TestKubernetes(AgentCheckTest):

CHECK_NAME = 'kubernetes'
Expand Down Expand Up @@ -295,7 +302,7 @@ def test_historate_1_2(self, *args):
@mock.patch('utils.kubernetes.KubeUtil.filter_pods_list',
side_effect=lambda x, y: x)
@mock.patch('utils.kubernetes.KubeUtil.retrieve_json_auth',
side_effect=lambda x,y: json.loads(Fixtures.read_file("events.json", string_escape=False)))
side_effect=KubeUtil_fake_retrieve_json_auth)
@mock.patch('utils.kubernetes.KubeUtil.retrieve_machine_info')
@mock.patch('utils.kubernetes.KubeUtil.retrieve_metrics')
@mock.patch('utils.kubernetes.KubeUtil.retrieve_pods_list',
Expand All @@ -311,10 +318,58 @@ def test_events(self, *args):
self.run_check(config, force_reload=True)
self.assertEvent('hello-node-47289321-91tfd Scheduled on Bar', count=1, exact_match=False)

# with no namespaces, only catch event from 'default'
self.assertEvent('dd-agent-a769 SuccessfulDelete on Bar', count=0, exact_match=False)

# again, now the timestamp is set and the event is discarded b/c too old
self.run_check(config)
self.assertEvent('hello-node-47289321-91tfd Scheduled on Bar', count=0, exact_match=False)

@mock.patch('utils.kubernetes.KubeUtil.get_node_info',
side_effect=lambda: ('Foo', 'Bar'))
@mock.patch('utils.kubernetes.KubeUtil.filter_pods_list')
@mock.patch('utils.kubernetes.KubeUtil.retrieve_json_auth',
side_effect=KubeUtil_fake_retrieve_json_auth)
@mock.patch('utils.kubernetes.KubeUtil.retrieve_machine_info')
@mock.patch('utils.kubernetes.KubeUtil.retrieve_metrics')
@mock.patch('utils.kubernetes.KubeUtil.retrieve_pods_list')
def test_namespaced_events(self, *args):
# reset last event pulling time
KubeUtil().last_event_collection_ts = 0

# Verify that we are retro compatible with the old 'namespace' configuration key
config = {'instances': [{'host': 'bar', 'collect_events': True, 'namespace': 'test-namespace-1'}]}
self.run_check(config, force_reload=True)
self.assertEvent('dd-agent-a769 SuccessfulDelete on Bar', count=1, exact_match=False)
self.assertEvent('hello-node-47289321-91tfd Scheduled on Bar', count=1, exact_match=False)

# reset last event pulling time
KubeUtil().last_event_collection_ts = 0

# Using 'namespaces' list
config = {'instances': [{'host': 'bar', 'collect_events': True, 'namespaces': ['test-namespace-1', 'test-namespace-2']}]}
self.run_check(config, force_reload=True)
self.assertEvent('dd-agent-a769 SuccessfulDelete on Bar', count=1, exact_match=False)
self.assertEvent('hello-node-47289321-91tfd Scheduled on Bar', count=0, exact_match=False)

# reset last event pulling time
KubeUtil().last_event_collection_ts = 0

# Using 'namespace_name_regexp' (since 'namespaces' is not set it should
# fallback to ['default'] and add any namespaces that matched with the regexp
config = {'instances': [{'host': 'bar', 'collect_events': True, 'namespace_name_regexp': 'test-namespace.*'}]}
self.run_check(config, force_reload=True)
self.assertEvent('dd-agent-a769 SuccessfulDelete on Bar', count=1, exact_match=False)
self.assertEvent('hello-node-47289321-91tfd Scheduled on Bar', count=1, exact_match=False)

# reset last event pulling time
KubeUtil().last_event_collection_ts = 0

# muting the 'default' namespace
config = {'instances': [{'host': 'bar', 'collect_events': True, 'namespaces': [], 'namespace_name_regexp': 'test-namespace.*'}]}
self.run_check(config, force_reload=True)
self.assertEvent('dd-agent-a769 SuccessfulDelete on Bar', count=1, exact_match=False)
self.assertEvent('hello-node-47289321-91tfd Scheduled on Bar', count=0, exact_match=False)

class TestKubeutil(unittest.TestCase):
def setUp(self):
Expand Down
2 changes: 1 addition & 1 deletion utils/kubernetes/kubeutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(self, instance=None):

# keep track of the latest k8s event we collected and posted
# default value is 0 but TTL for k8s events is one hour anyways
self.last_event_collection_ts = defaultdict(int)
self.last_event_collection_ts = 0

def get_kube_labels(self, excluded_keys=None):
pods = self.retrieve_pods_list()
Expand Down

0 comments on commit 69debde

Please sign in to comment.