Fixing issue with multiple interface of brick and peer probe

tendrl-bug-id: #814 bugzilla: 1573075 Signed-off-by: GowthamShanmugasundaram <gshanmug@redhat.com>
Tendrl · May 17, 2018 · daccbf2 · daccbf2
1 parent d213e3e
commit daccbf2
Show file tree

Hide file tree

Showing 6 changed files with 156 additions and 64 deletions.
diff --git a/...ode_agent/monitoring/collectd/collectors/gluster/heavy_weight/tendrl_gluster_heal_info.py b/...ode_agent/monitoring/collectd/collectors/gluster/heavy_weight/tendrl_gluster_heal_info.py
@@ -23,7 +23,8 @@ def _parse_heal_info_stats(tree, integration_id, etcd_client):
         brick_host = tendrl_glusterfs_utils.find_brick_host(
             etcd_client, integration_id, brick_host
         )
-
+        if not brick_host:
+            continue
         try:
             no_of_entries = int(brick.find("numberOfEntries").text)
         except ValueError:

diff --git a/...gent/monitoring/collectd/collectors/gluster/heavy_weight/tendrl_glusterfs_profile_info.py b/...gent/monitoring/collectd/collectors/gluster/heavy_weight/tendrl_glusterfs_profile_info.py
@@ -275,6 +275,8 @@ def process_volume_profile_info(self, volume):
             brick_host = tendrl_glusterfs_utils.find_brick_host(
                 self.etcd_client, self.CONFIG['integration_id'], brick_host
             )
+            if not brick_host:
+                continue
             t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.iops." \
                 "gauge-read"
             self.profile_info[

diff --git a/...t/monitoring/collectd/collectors/gluster/low_weight/tendrl_glusterfs_brick_utilization.py b/...t/monitoring/collectd/collectors/gluster/low_weight/tendrl_glusterfs_brick_utilization.py
@@ -1,4 +1,5 @@
 import collectd
+import etcd
 import os
 import shlex
 import socket
@@ -7,16 +8,46 @@
 import threading
 import traceback
 
-
 from tendrl_gluster import TendrlGlusterfsMonitoringBase
 
+import utils as tendrl_glusterfs_utils
+
 
 class TendrlBrickUtilizationPlugin(
     TendrlGlusterfsMonitoringBase
 ):
+    etcd_client = {}
+
     def __init__(self):
         self.provisioner_only_plugin = False
         TendrlGlusterfsMonitoringBase.__init__(self)
+        if not self.etcd_client:
+            _etcd_args = dict(
+                host=self.CONFIG['etcd_host'],
+                port=int(self.CONFIG['etcd_port'])
+            )
+            etcd_ca_cert_file = self.CONFIG.get("etcd_ca_cert_file")
+            etcd_cert_file = self.CONFIG.get("etcd_cert_file")
+            etcd_key_file = self.CONFIG.get("etcd_key_file")
+            if (
+                etcd_ca_cert_file and
+                str(etcd_ca_cert_file) != "" and
+                etcd_cert_file and
+                str(etcd_cert_file) != "" and
+                etcd_key_file and
+                str(etcd_key_file) != ""
+            ):
+                _etcd_args.update(
+                    {
+                        "ca_cert": str(self.CONFIG['etcd_ca_cert_file']),
+                        "cert": (
+                            str(self.CONFIG['etcd_cert_file']),
+                            str(self.CONFIG['etcd_key_file'])
+                        ),
+                        "protocol": "https"
+                    }
+                )
+            self.etcd_client = etcd.Client(**_etcd_args)
 
     def _get_mount_point(self, path):
         mount = os.path.realpath(path)
@@ -220,23 +251,29 @@ def get_brick_utilization(self):
                 {}
             ).iteritems():
                 for brick in sub_volume_bricks:
-                    brick_hostname = brick['hostname']
                     # Check if current brick is from localhost else utilization
                     # of brick from some other host can't be computed here..
-                    if (
-                        socket.gethostbyname(brick_hostname) ==
-                        socket.gethostbyname(
-                            self.CONFIG['peer_name']
-                        )
-                    ):
-                        thread = threading.Thread(
-                            target=self.calc_brick_utilization,
-                            args=(volume['name'], brick,)
-                        )
-                        thread.start()
-                        threads.append(
-                            thread
-                        )
+                    brick_hostname = tendrl_glusterfs_utils.find_brick_host(
+                        self.etcd_client,
+                        self.CONFIG['integration_id'],
+                        brick['hostname']
+                    )
+                    if brick_hostname:
+                        brick_ip = socket.gethostbyname(brick_hostname)
+                        if (
+                            brick_ip == socket.gethostbyname(
+                                self.CONFIG['peer_name']
+                            ) or
+                            brick_hostname == self.CONFIG['peer_name']
+                        ):
+                            thread = threading.Thread(
+                                target=self.calc_brick_utilization,
+                                args=(volume['name'], brick,)
+                            )
+                            thread.start()
+                            threads.append(
+                                thread
+                            )
         for thread in threads:
             thread.join(1)
         for thread in threads:

diff --git a/...ent/monitoring/collectd/collectors/gluster/low_weight/tendrl_glusterfs_health_counters.py b/...ent/monitoring/collectd/collectors/gluster/low_weight/tendrl_glusterfs_health_counters.py
@@ -1,4 +1,5 @@
 import collectd
+import etcd
 import socket
 import traceback
 
@@ -14,10 +15,40 @@
 class TendrlGlusterfsHealthCounters(
     TendrlGlusterfsMonitoringBase
 ):
+    etcd_client = {}
+
     def __init__(self):
         self.provisioner_only_plugin = False
         TendrlGlusterfsMonitoringBase.__init__(self)
 
+        if not self.etcd_client:
+            _etcd_args = dict(
+                host=self.CONFIG['etcd_host'],
+                port=int(self.CONFIG['etcd_port'])
+            )
+            etcd_ca_cert_file = self.CONFIG.get("etcd_ca_cert_file")
+            etcd_cert_file = self.CONFIG.get("etcd_cert_file")
+            etcd_key_file = self.CONFIG.get("etcd_key_file")
+            if (
+                etcd_ca_cert_file and
+                str(etcd_ca_cert_file) != "" and
+                etcd_cert_file and
+                str(etcd_cert_file) != "" and
+                etcd_key_file and
+                str(etcd_key_file) != ""
+            ):
+                _etcd_args.update(
+                    {
+                        "ca_cert": str(self.CONFIG['etcd_ca_cert_file']),
+                        "cert": (
+                            str(self.CONFIG['etcd_cert_file']),
+                            str(self.CONFIG['etcd_key_file'])
+                        ),
+                        "protocol": "https"
+                    }
+                )
+            self.etcd_client = etcd.Client(**_etcd_args)
+
     def _get_rebalance_info(self):
         ret_val = {}
         volumes = self.CLUSTER_TOPOLOGY.get('volumes', [])
@@ -54,24 +85,33 @@ def get_metrics(self):
                     {}
                 ).iteritems():
                     for brick in sub_volume_bricks:
-                        brick_ip = socket.gethostbyname(brick.get('hostname'))
-                        if (
-                            brick_ip == socket.gethostbyname(
-                                self.CONFIG['peer_name']
-                            ) or
-                            brick.get('hostname') == self.CONFIG['peer_name']
-                        ):
-                            brick_found_for_curr_node = True
-                            # Push brick client connections
-                            ret_val[
-                                'clusters.%s.volumes.%s.nodes.%s.bricks.%s.'
-                                'connections_count' % (
-                                    self.CONFIG['integration_id'],
-                                    volume.get('name', ''),
-                                    self.CONFIG['peer_name'].replace('.', '_'),
-                                    brick['path'].replace('/', '|')
-                                )
-                            ] = brick['connections_count']
+                        brick_hostname = \
+                            tendrl_glusterfs_utils.find_brick_host(
+                                self.etcd_client,
+                                self.CONFIG['integration_id'],
+                                brick.get('hostname')
+                            )
+                        if brick_hostname:
+                            brick_ip = socket.gethostbyname(brick_hostname)
+                            if (
+                                brick_ip == socket.gethostbyname(
+                                    self.CONFIG['peer_name']
+                                ) or
+                                brick_hostname == self.CONFIG['peer_name']
+                            ):
+                                brick_found_for_curr_node = True
+                                # Push brick client connections
+                                ret_val[
+                                    'clusters.%s.volumes.%s.nodes.%s.'
+                                    'bricks.%s.'
+                                    'connections_count' % (
+                                        self.CONFIG['integration_id'],
+                                        volume.get('name', ''),
+                                        self.CONFIG['peer_name'].replace(
+                                            '.', '_'),
+                                        brick['path'].replace('/', '|')
+                                    )
+                                ] = brick['connections_count']
                 if brick_found_for_curr_node:
                     # Update rebalance info only for this volumes
                     volumes_list.append(volume.get('name', ''))

diff --git a/tendrl/node_agent/monitoring/collectd/collectors/gluster/tendrl_gluster_brick_disk_stats.py b/tendrl/node_agent/monitoring/collectd/collectors/gluster/tendrl_gluster_brick_disk_stats.py
@@ -542,23 +542,29 @@ def get_metrics(self):
                 []
             ).iteritems():
                 for brick in sub_volume_bricks:
-                    brick_ip = socket.gethostbyname(brick['hostname'])
-                    if (
-                        brick_ip == curr_host_ip or
-                        brick['hostname'] == self.CONFIG['peer_name']
-                    ):
-                        thread = threading.Thread(
-                            target=self.populate_disk_details,
-                            args=(
-                                volume['name'],
-                                self.CONFIG['peer_name'],
-                                brick['path'],
+                    brick_hostname = gluster_utils.find_brick_host(
+                        self.etcd_client,
+                        self.CONFIG['integration_id'],
+                        brick['hostname']
+                    )
+                    if brick_hostname:
+                        brick_ip = socket.gethostbyname(brick_hostname)
+                        if (
+                            brick_ip == curr_host_ip or
+                            brick_hostname == self.CONFIG['peer_name']
+                        ):
+                            thread = threading.Thread(
+                                target=self.populate_disk_details,
+                                args=(
+                                    volume['name'],
+                                    self.CONFIG['peer_name'],
+                                    brick['path'],
+                                )
+                            )
+                            thread.start()
+                            threads.append(
+                                thread
                             )
-                        )
-                        thread.start()
-                        threads.append(
-                            thread
-                        )
         for thread in threads:
             thread.join(1)
         for thread in threads:

diff --git a/tendrl/node_agent/monitoring/collectd/collectors/gluster/utils.py b/tendrl/node_agent/monitoring/collectd/collectors/gluster/utils.py
@@ -333,22 +333,28 @@ def find_brick_host(etcd_client, integration_id, brick_host):
     if etcd_client:
         try:
             int_id = integration_id
+            ip = socket.gethostbyname(brick_host)
+            node_id = etcd_client.read("indexes/ip/%s" % ip).value
             _key = "indexes/tags/tendrl/integration/%s" % int_id
             all_nodes = etcd_client.read(_key).value
             all_nodes = json.loads(all_nodes)
-            for node_id in all_nodes:
-                fqdn = "/nodes/%s/NodeContext/fqdn" % node_id
-                fqdn = etcd_client.read(fqdn).value
-
-                if brick_host in fqdn:
-                    return fqdn
-
-                ip = "/nodes/%s/NodeContext/ipv4_addr" % node_id
-                ip = etcd_client.read(ip).value
-                if brick_host in ip:
-                    return fqdn
-
-        except (urllib3.exceptions.TimeoutError, etcd.EtcdKeyNotFound):
+            if node_id in all_nodes:
+                _key = "/clusters/%s/nodes/%s/NodeContext/data" % (
+                    int_id, node_id
+                )
+                data = etcd_client.read(_key).value
+                data = json.loads(data)
+                if data.get("is_managed", None) == "yes":
+                    if data.get("fqdn", None):
+                        return data["fqdn"]
+                    elif data.get("ipv4_addr", None):
+                        return data["ipv4_addr"]
+        except (
+            urllib3.exceptions.TimeoutError,
+            etcd.EtcdKeyNotFound,
+            TypeError
+        ):
             _msg = "Error finding fqdn/ip for brick %s" % brick_host
             collectd.warning(_msg)
             collectd.warning(traceback.format_exc())
+    return None