Skip to content

Commit

Permalink
Merge pull request #3028 from DataDog/maxime/kubernetes-namespaces
Browse files Browse the repository at this point in the history
Updating Kubernetes check to handle multiple namespaces fix #2838
  • Loading branch information
hush-hush committed Nov 21, 2016
2 parents 3314956 + 69debde commit 2ae4b17
Show file tree
Hide file tree
Showing 6 changed files with 226 additions and 13 deletions.
48 changes: 42 additions & 6 deletions checks.d/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
'network.??_bytes',
'cpu.*.total']
DEFAULT_COLLECT_EVENTS = False
DEFAULT_NAMESPACES = ['default']

NET_ERRORS = ['rx_errors', 'tx_errors', 'rx_dropped', 'tx_dropped']

Expand Down Expand Up @@ -90,6 +91,15 @@ def __init__(self, name, init_config, agentConfig, instances=None):
if not self.kubeutil.host:
raise Exception('Unable to retrieve Docker hostname and host parameter is not set')

self.k8s_namespace_regexp = None
if inst:
regexp = inst.get('namespace_name_regexp', None)
if regexp:
try:
self.k8s_namespace_regexp = re.compile(regexp)
except re.error as e:
self.log.warning('Invalid regexp for "namespace_name_regexp" in configuration (ignoring regexp): %s' % str(e))

def _perform_kubelet_checks(self, url):
service_check_base = NAMESPACE + '.kubelet.check'
is_ok = True
Expand Down Expand Up @@ -423,16 +433,38 @@ def _process_events(self, instance, pods_list):
node_ip, node_name = self.kubeutil.get_node_info()
self.log.debug('Processing events on {} [{}]'.format(node_name, node_ip))

k8s_namespace = instance.get('namespace', 'default')
events_endpoint = '{}/namespaces/{}/events'.format(self.kubeutil.kubernetes_api_url, k8s_namespace)
k8s_namespaces = instance.get('namespaces', DEFAULT_NAMESPACES)
if not isinstance(k8s_namespaces, list):
self.log.warning('Configuration key "namespaces" is not a list: fallback to the default value')
k8s_namespaces = DEFAULT_NAMESPACES

# handle old config value
if 'namespace' in instance and instance.get('namespace') not in (None, 'default'):
self.log.warning('''The 'namespace' parameter is deprecated and will stop being supported starting '''
'''from 5.12. Please use 'namespaces' and/or 'namespace_name_regexp' instead.''')
k8s_namespaces.append(instance.get('namespace'))

if self.k8s_namespace_regexp:
namespaces_endpoint = '{}/namespaces'.format(self.kubeutil.kubernetes_api_url)
self.log.debug('Kubernetes API endpoint to query namespaces: %s' % namespaces_endpoint)

namespaces = self.kubeutil.retrieve_json_auth(namespaces_endpoint, self.kubeutil.get_auth_token())
for namespace in namespaces.get('items', []):
name = namespace.get('metadata', {}).get('name', None)
if name and self.k8s_namespace_regexp.match(name):
k8s_namespaces.append(name)

k8s_namespaces = set(k8s_namespaces)

events_endpoint = '{}/events'.format(self.kubeutil.kubernetes_api_url)
self.log.debug('Kubernetes API endpoint to query events: %s' % events_endpoint)

events = self.kubeutil.retrieve_json_auth(events_endpoint, self.kubeutil.get_auth_token())
event_items = events.get('items') or []
last_read = self.kubeutil.last_event_collection_ts[k8s_namespace]
last_read = self.kubeutil.last_event_collection_ts
most_recent_read = 0

self.log.debug('Found {} events, filtering out using timestamp: {}'.format(len(event_items), last_read))
self.log.debug('Found {} events, filtering out using timestamp: {} and namespaces: {}'.format(len(event_items), last_read, k8s_namespaces))

for event in event_items:
# skip if the event is too old
Expand All @@ -442,6 +474,10 @@ def _process_events(self, instance, pods_list):

involved_obj = event.get('involvedObject', {})

# filter events by white listed namespaces (empty namespace belong to the 'default' one)
if involved_obj.get('namespace', 'default') not in k8s_namespaces:
continue

tags = self.kubeutil.extract_event_tags(event)

# compute the most recently seen event, without relying on items order
Expand All @@ -467,5 +503,5 @@ def _process_events(self, instance, pods_list):
self.event(dd_event)

if most_recent_read > 0:
self.kubeutil.last_event_collection_ts[k8s_namespace] = most_recent_read
self.log.debug('_last_event_collection_ts is now {}'.format(most_recent_read))
self.kubeutil.last_event_collection_ts = most_recent_read
self.log.debug('last_event_collection_ts is now {}'.format(most_recent_read))
16 changes: 12 additions & 4 deletions conf.d/kubernetes.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,18 @@ instances:
# collect_events: false
#
#
# The namespace for which events should be collected.
# If not modified, the default namespace will be used.
# The namespaces for which events should be collected.
# If not modified, the 'default' namespace will be used.
#
# namespace: default
# namespaces:
# - default

# The regexp used to select namespaces for which events should be collected.
# The matched namespaces will be added to the "namespaces" list.
# If empty, regexp selection will be ignored.
#
# namespace_name_regexp:


# use_histogram controls whether we send detailed metrics, i.e. one per container.
# When false, we send detailed metrics corresponding to individual containers, tagging by container id
Expand All @@ -42,4 +50,4 @@ instances:
# - network.*
#
# enabled_gauges:
# - filesystem.*
# - filesystem.*
29 changes: 28 additions & 1 deletion tests/checks/fixtures/kubernetes/events.json
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,33 @@
"lastTimestamp": "2016-05-27T16:37:13Z",
"count": 1,
"type": "Normal"
},
{
"metadata": {
"name": "dd-agent-a769.148751928c4f601b",
"namespace": "test-namespace-1",
"selfLink": "/api/v1/namespaces/test-namespace-1/events/dd-agent-a769.148751928c4f601b",
"uid": "b3aff766-ab6f-11e6-819b-42010a84006e",
"resourceVersion": "2432",
"creationTimestamp": "2016-11-15T20:11:32Z"
},
"involvedObject": {
"kind": "DaemonSet",
"namespace": "test-namespace-1",
"name": "dd-agent-a769",
"uid": "8469b0d3-a769-11e6-b048-42010a84006e",
"apiVersion": "extensions",
"resourceVersion": "2835032"
},
"reason": "SuccessfulDelete",
"message": "Deleted pod: dd-agent-a769-zbdic",
"source": {
"component": "daemon-set"
},
"firstTimestamp": "2016-11-15T20:11:32Z",
"lastTimestamp": "2016-11-15T20:11:32Z",
"count": 1,
"type": "Normal"
}
]
}
}
87 changes: 87 additions & 0 deletions tests/checks/fixtures/kubernetes/namespaces.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
{
"kind": "NamespaceList",
"apiVersion": "v1",
"metadata": {
"selfLink": "/api/v1/namespaces",
"resourceVersion": "2841873"
},
"items": [
{
"metadata": {
"name": "default",
"selfLink": "/api/v1/namespaces/default",
"uid": "7f1a0d0c-65f3-11e6-b5c9-42010a840043",
"resourceVersion": "6",
"creationTimestamp": "2016-08-19T09:58:36Z"
},
"spec": {
"finalizers": [
"kubernetes"
]
},
"status": {
"phase": "Active"
}
},
{
"metadata": {
"name": "kube-system",
"selfLink": "/api/v1/namespaces/kube-system",
"uid": "7f239026-65f3-11e6-b5c9-42010a840043",
"resourceVersion": "46",
"creationTimestamp": "2016-08-19T09:58:36Z",
"annotations": {
"kubectl.kubernetes.io/last-applied-configuration": "{\"kind\":\"Namespace\",\"apiVersion\":\"v1\",\"metadata\":{\"name\":\"kube-system\",\"creationTimestamp\":null},\"spec\":{},\"status\":{}}"
}
},
"spec": {
"finalizers": [
"kubernetes"
]
},
"status": {
"phase": "Active"
}
},
{
"metadata": {
"name": "test-namespace-1",
"selfLink": "/api/v1/namespaces/test-namespace-1",
"uid": "473b5f59-a769-11e6-b048-42010a84006e",
"resourceVersion": "2152956",
"creationTimestamp": "2016-11-10T17:15:28Z",
"labels": {
"name": "test-namespace-1"
}
},
"spec": {
"finalizers": [
"kubernetes"
]
},
"status": {
"phase": "Active"
}
},
{
"metadata": {
"name": "test-namespace-2",
"selfLink": "/api/v1/namespaces/test-namespace-2",
"uid": "d4973ec1-a769-11e6-b048-42010a84006e",
"resourceVersion": "2153046",
"creationTimestamp": "2016-11-10T17:19:26Z",
"labels": {
"name": "test-namespace-2"
}
},
"spec": {
"finalizers": [
"kubernetes"
]
},
"status": {
"phase": "Active"
}
}
]
}
57 changes: 56 additions & 1 deletion tests/checks/mock/test_kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@
]


def KubeUtil_fake_retrieve_json_auth(url, auth_token, timeout=10):
if url.endswith("/namespaces"):
return json.loads(Fixtures.read_file("namespaces.json", string_escape=False))
if url.endswith("/events"):
return json.loads(Fixtures.read_file("events.json", string_escape=False))
return {}

class TestKubernetes(AgentCheckTest):

CHECK_NAME = 'kubernetes'
Expand Down Expand Up @@ -295,7 +302,7 @@ def test_historate_1_2(self, *args):
@mock.patch('utils.kubernetes.KubeUtil.filter_pods_list',
side_effect=lambda x, y: x)
@mock.patch('utils.kubernetes.KubeUtil.retrieve_json_auth',
side_effect=lambda x,y: json.loads(Fixtures.read_file("events.json", string_escape=False)))
side_effect=KubeUtil_fake_retrieve_json_auth)
@mock.patch('utils.kubernetes.KubeUtil.retrieve_machine_info')
@mock.patch('utils.kubernetes.KubeUtil.retrieve_metrics')
@mock.patch('utils.kubernetes.KubeUtil.retrieve_pods_list',
Expand All @@ -311,10 +318,58 @@ def test_events(self, *args):
self.run_check(config, force_reload=True)
self.assertEvent('hello-node-47289321-91tfd Scheduled on Bar', count=1, exact_match=False)

# with no namespaces, only catch event from 'default'
self.assertEvent('dd-agent-a769 SuccessfulDelete on Bar', count=0, exact_match=False)

# again, now the timestamp is set and the event is discarded b/c too old
self.run_check(config)
self.assertEvent('hello-node-47289321-91tfd Scheduled on Bar', count=0, exact_match=False)

@mock.patch('utils.kubernetes.KubeUtil.get_node_info',
side_effect=lambda: ('Foo', 'Bar'))
@mock.patch('utils.kubernetes.KubeUtil.filter_pods_list')
@mock.patch('utils.kubernetes.KubeUtil.retrieve_json_auth',
side_effect=KubeUtil_fake_retrieve_json_auth)
@mock.patch('utils.kubernetes.KubeUtil.retrieve_machine_info')
@mock.patch('utils.kubernetes.KubeUtil.retrieve_metrics')
@mock.patch('utils.kubernetes.KubeUtil.retrieve_pods_list')
def test_namespaced_events(self, *args):
# reset last event pulling time
KubeUtil().last_event_collection_ts = 0

# Verify that we are retro compatible with the old 'namespace' configuration key
config = {'instances': [{'host': 'bar', 'collect_events': True, 'namespace': 'test-namespace-1'}]}
self.run_check(config, force_reload=True)
self.assertEvent('dd-agent-a769 SuccessfulDelete on Bar', count=1, exact_match=False)
self.assertEvent('hello-node-47289321-91tfd Scheduled on Bar', count=1, exact_match=False)

# reset last event pulling time
KubeUtil().last_event_collection_ts = 0

# Using 'namespaces' list
config = {'instances': [{'host': 'bar', 'collect_events': True, 'namespaces': ['test-namespace-1', 'test-namespace-2']}]}
self.run_check(config, force_reload=True)
self.assertEvent('dd-agent-a769 SuccessfulDelete on Bar', count=1, exact_match=False)
self.assertEvent('hello-node-47289321-91tfd Scheduled on Bar', count=0, exact_match=False)

# reset last event pulling time
KubeUtil().last_event_collection_ts = 0

# Using 'namespace_name_regexp' (since 'namespaces' is not set it should
# fallback to ['default'] and add any namespaces that matched with the regexp
config = {'instances': [{'host': 'bar', 'collect_events': True, 'namespace_name_regexp': 'test-namespace.*'}]}
self.run_check(config, force_reload=True)
self.assertEvent('dd-agent-a769 SuccessfulDelete on Bar', count=1, exact_match=False)
self.assertEvent('hello-node-47289321-91tfd Scheduled on Bar', count=1, exact_match=False)

# reset last event pulling time
KubeUtil().last_event_collection_ts = 0

# muting the 'default' namespace
config = {'instances': [{'host': 'bar', 'collect_events': True, 'namespaces': [], 'namespace_name_regexp': 'test-namespace.*'}]}
self.run_check(config, force_reload=True)
self.assertEvent('dd-agent-a769 SuccessfulDelete on Bar', count=1, exact_match=False)
self.assertEvent('hello-node-47289321-91tfd Scheduled on Bar', count=0, exact_match=False)

class TestKubeutil(unittest.TestCase):
def setUp(self):
Expand Down
2 changes: 1 addition & 1 deletion utils/kubernetes/kubeutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(self, instance=None):

# keep track of the latest k8s event we collected and posted
# default value is 0 but TTL for k8s events is one hour anyways
self.last_event_collection_ts = defaultdict(int)
self.last_event_collection_ts = 0

def get_kube_labels(self, excluded_keys=None):
pods = self.retrieve_pods_list()
Expand Down

0 comments on commit 2ae4b17

Please sign in to comment.