Skip to content

Commit

Permalink
Fixing issue with multiple interface of brick and peer probe
Browse files Browse the repository at this point in the history
tendrl-bug-id: #814
bugzilla: 1573075

Signed-off-by: GowthamShanmugasundaram <gshanmug@redhat.com>
  • Loading branch information
GowthamShanmugam committed May 17, 2018
1 parent d213e3e commit daccbf2
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 64 deletions.
Expand Up @@ -23,7 +23,8 @@ def _parse_heal_info_stats(tree, integration_id, etcd_client):
brick_host = tendrl_glusterfs_utils.find_brick_host(
etcd_client, integration_id, brick_host
)

if not brick_host:
continue
try:
no_of_entries = int(brick.find("numberOfEntries").text)
except ValueError:
Expand Down
Expand Up @@ -275,6 +275,8 @@ def process_volume_profile_info(self, volume):
brick_host = tendrl_glusterfs_utils.find_brick_host(
self.etcd_client, self.CONFIG['integration_id'], brick_host
)
if not brick_host:
continue
t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.iops." \
"gauge-read"
self.profile_info[
Expand Down
@@ -1,4 +1,5 @@
import collectd
import etcd
import os
import shlex
import socket
Expand All @@ -7,16 +8,46 @@
import threading
import traceback


from tendrl_gluster import TendrlGlusterfsMonitoringBase

import utils as tendrl_glusterfs_utils


class TendrlBrickUtilizationPlugin(
TendrlGlusterfsMonitoringBase
):
etcd_client = {}

def __init__(self):
self.provisioner_only_plugin = False
TendrlGlusterfsMonitoringBase.__init__(self)
if not self.etcd_client:
_etcd_args = dict(
host=self.CONFIG['etcd_host'],
port=int(self.CONFIG['etcd_port'])
)
etcd_ca_cert_file = self.CONFIG.get("etcd_ca_cert_file")
etcd_cert_file = self.CONFIG.get("etcd_cert_file")
etcd_key_file = self.CONFIG.get("etcd_key_file")
if (
etcd_ca_cert_file and
str(etcd_ca_cert_file) != "" and
etcd_cert_file and
str(etcd_cert_file) != "" and
etcd_key_file and
str(etcd_key_file) != ""
):
_etcd_args.update(
{
"ca_cert": str(self.CONFIG['etcd_ca_cert_file']),
"cert": (
str(self.CONFIG['etcd_cert_file']),
str(self.CONFIG['etcd_key_file'])
),
"protocol": "https"
}
)
self.etcd_client = etcd.Client(**_etcd_args)

def _get_mount_point(self, path):
mount = os.path.realpath(path)
Expand Down Expand Up @@ -220,23 +251,29 @@ def get_brick_utilization(self):
{}
).iteritems():
for brick in sub_volume_bricks:
brick_hostname = brick['hostname']
# Check if current brick is from localhost else utilization
# of brick from some other host can't be computed here..
if (
socket.gethostbyname(brick_hostname) ==
socket.gethostbyname(
self.CONFIG['peer_name']
)
):
thread = threading.Thread(
target=self.calc_brick_utilization,
args=(volume['name'], brick,)
)
thread.start()
threads.append(
thread
)
brick_hostname = tendrl_glusterfs_utils.find_brick_host(
self.etcd_client,
self.CONFIG['integration_id'],
brick['hostname']
)
if brick_hostname:
brick_ip = socket.gethostbyname(brick_hostname)
if (
brick_ip == socket.gethostbyname(
self.CONFIG['peer_name']
) or
brick_hostname == self.CONFIG['peer_name']
):
thread = threading.Thread(
target=self.calc_brick_utilization,
args=(volume['name'], brick,)
)
thread.start()
threads.append(
thread
)
for thread in threads:
thread.join(1)
for thread in threads:
Expand Down
@@ -1,4 +1,5 @@
import collectd
import etcd
import socket
import traceback

Expand All @@ -14,10 +15,40 @@
class TendrlGlusterfsHealthCounters(
TendrlGlusterfsMonitoringBase
):
etcd_client = {}

def __init__(self):
self.provisioner_only_plugin = False
TendrlGlusterfsMonitoringBase.__init__(self)

if not self.etcd_client:
_etcd_args = dict(
host=self.CONFIG['etcd_host'],
port=int(self.CONFIG['etcd_port'])
)
etcd_ca_cert_file = self.CONFIG.get("etcd_ca_cert_file")
etcd_cert_file = self.CONFIG.get("etcd_cert_file")
etcd_key_file = self.CONFIG.get("etcd_key_file")
if (
etcd_ca_cert_file and
str(etcd_ca_cert_file) != "" and
etcd_cert_file and
str(etcd_cert_file) != "" and
etcd_key_file and
str(etcd_key_file) != ""
):
_etcd_args.update(
{
"ca_cert": str(self.CONFIG['etcd_ca_cert_file']),
"cert": (
str(self.CONFIG['etcd_cert_file']),
str(self.CONFIG['etcd_key_file'])
),
"protocol": "https"
}
)
self.etcd_client = etcd.Client(**_etcd_args)

def _get_rebalance_info(self):
ret_val = {}
volumes = self.CLUSTER_TOPOLOGY.get('volumes', [])
Expand Down Expand Up @@ -54,24 +85,33 @@ def get_metrics(self):
{}
).iteritems():
for brick in sub_volume_bricks:
brick_ip = socket.gethostbyname(brick.get('hostname'))
if (
brick_ip == socket.gethostbyname(
self.CONFIG['peer_name']
) or
brick.get('hostname') == self.CONFIG['peer_name']
):
brick_found_for_curr_node = True
# Push brick client connections
ret_val[
'clusters.%s.volumes.%s.nodes.%s.bricks.%s.'
'connections_count' % (
self.CONFIG['integration_id'],
volume.get('name', ''),
self.CONFIG['peer_name'].replace('.', '_'),
brick['path'].replace('/', '|')
)
] = brick['connections_count']
brick_hostname = \
tendrl_glusterfs_utils.find_brick_host(
self.etcd_client,
self.CONFIG['integration_id'],
brick.get('hostname')
)
if brick_hostname:
brick_ip = socket.gethostbyname(brick_hostname)
if (
brick_ip == socket.gethostbyname(
self.CONFIG['peer_name']
) or
brick_hostname == self.CONFIG['peer_name']
):
brick_found_for_curr_node = True
# Push brick client connections
ret_val[
'clusters.%s.volumes.%s.nodes.%s.'
'bricks.%s.'
'connections_count' % (
self.CONFIG['integration_id'],
volume.get('name', ''),
self.CONFIG['peer_name'].replace(
'.', '_'),
brick['path'].replace('/', '|')
)
] = brick['connections_count']
if brick_found_for_curr_node:
# Update rebalance info only for this volumes
volumes_list.append(volume.get('name', ''))
Expand Down
Expand Up @@ -542,23 +542,29 @@ def get_metrics(self):
[]
).iteritems():
for brick in sub_volume_bricks:
brick_ip = socket.gethostbyname(brick['hostname'])
if (
brick_ip == curr_host_ip or
brick['hostname'] == self.CONFIG['peer_name']
):
thread = threading.Thread(
target=self.populate_disk_details,
args=(
volume['name'],
self.CONFIG['peer_name'],
brick['path'],
brick_hostname = gluster_utils.find_brick_host(
self.etcd_client,
self.CONFIG['integration_id'],
brick['hostname']
)
if brick_hostname:
brick_ip = socket.gethostbyname(brick_hostname)
if (
brick_ip == curr_host_ip or
brick_hostname == self.CONFIG['peer_name']
):
thread = threading.Thread(
target=self.populate_disk_details,
args=(
volume['name'],
self.CONFIG['peer_name'],
brick['path'],
)
)
thread.start()
threads.append(
thread
)
)
thread.start()
threads.append(
thread
)
for thread in threads:
thread.join(1)
for thread in threads:
Expand Down
32 changes: 19 additions & 13 deletions tendrl/node_agent/monitoring/collectd/collectors/gluster/utils.py
Expand Up @@ -333,22 +333,28 @@ def find_brick_host(etcd_client, integration_id, brick_host):
if etcd_client:
try:
int_id = integration_id
ip = socket.gethostbyname(brick_host)
node_id = etcd_client.read("indexes/ip/%s" % ip).value
_key = "indexes/tags/tendrl/integration/%s" % int_id
all_nodes = etcd_client.read(_key).value
all_nodes = json.loads(all_nodes)
for node_id in all_nodes:
fqdn = "/nodes/%s/NodeContext/fqdn" % node_id
fqdn = etcd_client.read(fqdn).value

if brick_host in fqdn:
return fqdn

ip = "/nodes/%s/NodeContext/ipv4_addr" % node_id
ip = etcd_client.read(ip).value
if brick_host in ip:
return fqdn

except (urllib3.exceptions.TimeoutError, etcd.EtcdKeyNotFound):
if node_id in all_nodes:
_key = "/clusters/%s/nodes/%s/NodeContext/data" % (
int_id, node_id
)
data = etcd_client.read(_key).value
data = json.loads(data)
if data.get("is_managed", None) == "yes":
if data.get("fqdn", None):
return data["fqdn"]
elif data.get("ipv4_addr", None):
return data["ipv4_addr"]
except (
urllib3.exceptions.TimeoutError,
etcd.EtcdKeyNotFound,
TypeError
):
_msg = "Error finding fqdn/ip for brick %s" % brick_host
collectd.warning(_msg)
collectd.warning(traceback.format_exc())
return None

0 comments on commit daccbf2

Please sign in to comment.