-
Notifications
You must be signed in to change notification settings - Fork 0
/
sd-cluster-check
executable file
·302 lines (270 loc) · 13.6 KB
/
sd-cluster-check
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
#!/usr/bin/env python
"""sd-cluster-check
Script for determining information about High availability clusters on RHEL/CentOS
from extracted sosreports.
WARNING: This is not production ready yet. So far this is a proof of concept
and code is a mess.
__author__ = "Ondrej Famera"
__email__ = "ofamera@redhat.com"
__license__ = "GPLv3"
__status__ = "Proof-of-Concept"
"""
import os.path
import re
# xml parsing
import xml.etree.ElementTree as ET
# find the all sosreports
clusters = {}
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
### parsing input paths
import getopt, sys
import subprocess
try:
opts, args = getopt.getopt(sys.argv[1:],"hv",[])
except getopt.GetoptError:
print("error parsing arguments")
sys.exit(2)
verbose=False
for o, a in opts:
if o == '-v':
verbose=True
print("== sd-cluster-check v0.3 ==")
if len(sys.argv) == 1:
print ("""
sd-cluster-check [-v] path/sos_report1 [path/sos_report2] ...
sd-cluster-check [-v] path/dir_with_sosreports1 [path/dir_with_sosreports2] ...
This script takes as parameters directory with unpacked sosreports or
directory contatining directories with unpacked sosreports.
You can provide multiple parameters with paths.
As output you will get basic information about HA clusters detected in
sosreports and information about the nodes for which the sosreports were provided.
Thanks to that you can tell from which nodes you are missing sosreports.
When the '-v' argument is passed the summary will contain path to sosreports to
given nodes (except of duplicates).
To request features or report bug you can create issue on github or email it.
https://github.com/OndrejHome/sd-cluster-check/issues
ofamera@redhat.com
""")
sys.exit(0)
sosreport_list = []
print("Searching for sosreports in given path(s)")
for path in args:
if (os.path.isdir(path)):
print("path: "+path)
try:
output = subprocess.check_output("find "+path+" -maxdepth 3 -name sos_commands -type d", shell=True)
except subprocess.CalledProcessError as e:
# use output even if we receive errors from 'find' command
output = e.output
for row in output.decode('utf-8').split('\n'):
if len(row) > 0:
row_data = row.split('/')
print('found '+'/'.join(row_data[0:-1]))
sosreport_list.append('/'.join(row_data[0:-1]))
else:
print("[e] path "+path+" is not a directory")
###
print("[i] " + str(len(sosreport_list))+" sosreport(s) found, processing them now")
def check_ip ( sosreport_path, ip ):
"Check if IP address is active on system"
mymatch = None
## check if we have networking information
if (os.path.isfile(sosreport_path+'/sos_commands/networking/ip_-o_addr')):
ifile = open(sosreport_path+'/sos_commands/networking/ip_-o_addr', 'r')
ip_match = re.compile(r"^.*inet " + re.escape(ip) + "/.*$")
for line in ifile.readlines():
mymatch = ip_match.search(line)
if mymatch:
return True
elif (os.path.isfile(sosreport_path+'/sos_commands/networking/ip_address')):
ifile = open(sosreport_path+'/sos_commands/networking/ip_address', 'r')
ip_match = re.compile(r"^.*inet " + re.escape(ip) + "/.*$")
for line in ifile.readlines():
mymatch = ip_match.search(line)
if mymatch:
return True
elif (os.path.isfile(sosreport_path+'/sos_commands/networking/ifconfig_-a')):
ifile = open(sosreport_path+'/sos_commands/networking/ifconfig_-a', 'r')
ip_match = re.compile(r"^.*inet addr:" + re.escape(ip) + "\s+.*$")
for line in ifile.readlines():
mymatch = ip_match.search(line)
if mymatch:
return True
else:
print("[e] sosreport doesn't containt any networking information ('ifconfig' nor 'ip addr') - "+sosreport_path)
return False
def parse_cluster_name ( sosreport_path ):
cluster_name = ""
in_cluster_conf = False
in_corosync_conf = False
cluster_type = "unknown"
if (os.path.isfile(sosreport_path+'/etc/cluster/cluster.conf')):
cluster_conf = ET.parse(sosreport_path+'/etc/cluster/cluster.conf')
root = cluster_conf.getroot()
if 'name' in root.attrib:
cluster_name = root.attrib['name']
in_cluster_conf = True
if len(root.findall('.//fencedevice[@agent="fence_pcmk"]')) > 0:
cluster_type = "pacemaker"
else:
cluster_type = "rgmanager"
if (os.path.isfile(sosreport_path+'/etc/corosync/corosync.conf')):
corosync_conf = open(sosreport_path+'/etc/corosync/corosync.conf', 'r')
cl_name = re.compile(r"cluster_name\s*:\s*([\w.-]+)\s*", re.M+re.S)
match_name = cl_name.search(corosync_conf.read())
if match_name:
cluster_name = match_name.group(1)
in_corosync_conf = True
cluster_type = "pacemaker"
return cluster_name, in_cluster_conf, in_corosync_conf, cluster_type
def parse_corosync_conf_nodes ( sosreport_path, cluster_nodes ):
corosync_conf = open(sosreport_path+'/etc/corosync/corosync.conf', 'r')
nodes = re.compile(r"node\s*\{([^}]+)\}", re.M+re.S)
nodes_list = nodes.findall(corosync_conf.read())
if len(nodes_list) > 0:
n_name = re.compile(r"ring0_addr\s*:\s*([\w.-]+)\s*", re.M)
n_id = re.compile(r"nodeid\s*:\s*(\d+)\s*", re.M)
for node in nodes_list:
n_name2 = None
n_id2 = None
n_name2 = n_name.search(node)
n_id2 = n_id.search(node)
if n_name2 and n_id2:
node_name = n_name2.group(1)
node_id = n_id2.group(1)
if node_id not in cluster_nodes:
cluster_nodes[node_id] = { 'name': node_name, 'node_id': node_id}
else:
continue
def parse_cluster_conf_nodes ( sosreport_path, cluster_nodes ):
cluster_conf = ET.parse(sosreport_path+'/etc/cluster/cluster.conf')
root = cluster_conf.getroot()
for node in root.findall('.//clusternode'):
if 'nodeid' in node.attrib and 'name' in node.attrib and node.attrib['nodeid'] not in cluster_nodes:
cluster_nodes[node.attrib['nodeid']] = { 'name': node.attrib['name'], 'node_id': node.attrib['nodeid'] }
def match_nodes_to_hosts ( sosreport_path, cluster_nodes ):
if (os.path.isfile(sosreport_path+'/sos_commands/general/hostname')):
hostname = open(sosreport_path+'/sos_commands/general/hostname','r').readlines()[0].rstrip('\n')
elif (os.path.isfile(sosreport_path+'/sos_commands/host/hostname')):
hostname = open(sosreport_path+'/sos_commands/host/hostname','r').readlines()[0].rstrip('\n')
else:
hostname = 'unknown'
for node in cluster_nodes:
mymatch = None
node_ip = ""
host_match = re.compile(r"^(?P<ip>\d+\.\d+\.\d+\.\d+).*\s+" + cluster_nodes[node]['name'] + "(\s+.*|$)")
if not (os.path.isfile(sosreport_path+'/etc/hosts')):
continue
print("[w] /etc/hosts not found in sosreport for node " + node)
hosts_file = open(sosreport_path+'/etc/hosts','r')
# find first matching hostname with IP
for line in hosts_file.readlines():
mymatch = host_match.search(line)
if mymatch:
node_ip = mymatch.group('ip')
break
# try checking if the node name is not just IPv4 address
if not mymatch:
node_as_ip_match = re.compile(r"^(?P<ip>\d+\.\d+\.\d+\.\d+)$")
if node_as_ip_match.match(cluster_nodes[node]['name']):
node_ip = cluster_nodes[node]['name']
if mymatch or len(node_ip) > 0:
if 'ip' not in cluster_nodes[node]:
cluster_nodes[node]['ip'] = {}
if '==global==' not in cluster_nodes[node]['ip']:
cluster_nodes[node]['ip']['==global=='] = node_ip
elif cluster_nodes[node]['ip']['==global=='] != node_ip:
print("[w] Inconsistent naming in /etc/hosts accross nodes detected !!!")
cluster_nodes[node]['ip'][hostname] = node_ip
## checking if we can match IP to this host sosreport
if check_ip (sosreport_path, node_ip):
cluster_nodes[node]['has_sos_report'] = True
if (os.path.isdir(sosreport+'/sos_commands/cluster/crm_report/'+cluster_nodes[node]['name'])):
cluster_nodes[node]['has_crm_report'] = True
if (os.path.isdir(sosreport+'/sos_commands/pacemaker/crm_report/'+cluster_nodes[node]['name'])):
cluster_nodes[node]['has_crm_report'] = True
## add the reference to sosreport to node from which it is
if 'sosreports' not in cluster_nodes[node]:
cluster_nodes[node]['sosreports'] = {}
# determine sosreport date
sosreport_date = 'unknown'
if not (os.path.isfile(sosreport_path+'/date')):
print('[w] there is no "date" file to determine from which time is sosreport')
else:
sosreport_date = open(sosreport_path+'/date','r').read().replace('\n','')
if sosreport_date in cluster_nodes[node]['sosreports']:
print('[w] seems that we have 2 sosreport from same node and with same time')
print(' 1. '+cluster_nodes[node]['sosreports'][sosreport_date])
print(' 2. '+sosreport_path)
continue
else:
cluster_nodes[node]['sosreports'][sosreport_date] = sosreport_path
else:
print("[w] Not possible to determine cluster node IP from node name, no record in /etc/hosts! - "+cluster_nodes[node]['name'] + " sosreport: "+sosreport_path)
for sosreport in sosreport_list:
# initial variable setup
cluster_name = ""
# warn if no /sos_commands/cluster directory is found - most probably this won't be a cluster
if not (os.path.isdir(sosreport+'/sos_commands/cluster')) and not (os.path.isdir(sosreport+'/sos_commands/pacemaker')):
print("[w] sos_commands/cluster nor sos_commands/pacemaker are present in "+sosreport)
# 1. try to determine cluster_name
# -> this will also tell us if this is rgmanager or pacemaker cluster
cluster_name, in_cluster_conf, in_corosync_conf, cluster_type = parse_cluster_name(sosreport)
if len(cluster_name) > 0 and cluster_name not in clusters:
clusters[cluster_name] = { 'nodes': {}, 'type': cluster_type }
# 2. based on cluster type parse the node names and IDs
if in_corosync_conf:
# parse pacemaker nodes - this is most probably RHEL 7 system
parse_corosync_conf_nodes(sosreport,clusters[cluster_name]['nodes'])
if in_cluster_conf:
# parse cluster.conf nodes (on RHEL 6 pacemaker node names
# and IDs are in cluster.conf and corosync.conf doesn't exists there
parse_cluster_conf_nodes(sosreport,clusters[cluster_name]['nodes'])
# 3. Once we have nodes, parse the /etc/hosts file to determine if we can tell from which
# node this sosreport is (TODO note somewhere the look at IP addresses of other nodes
# to check if we have a consistent look - detection of missing/missconfigured /etc/hosts)
if in_cluster_conf or in_corosync_conf:
match_nodes_to_hosts(sosreport, clusters[cluster_name]['nodes'])
# 4. Assign any additional attributes to cluster from this sosreport
###################################################
print("============ detected " + str(len(clusters)) + " cluster(s)")
#from operator import attrgetter, itemgetter
for cl in clusters:
print("==== " + bcolors.HEADER + cl + bcolors.ENDC + " ( " + clusters[cl]['type'] + " )")
# print("{0:2s} {1:20s} {2:3s} {3:3s} {4:15s}".format('#','node name', 'sos', 'crm', 'heartbeat IP'))
if clusters[cl]['type'] == 'pacemaker':
print("{0:2s} {1:15s} {2:3s} {3:3s} {4:20s}".format('#','heartbeat IP', 'sos', 'crm', 'node name'))
elif clusters[cl]['type'] == 'rgmanager':
print("{0:2s} {1:15s} {2:3s} {3:20s}".format('#','heartbeat IP', 'sos', 'node name'))
for n in sorted(clusters[cl]['nodes'], key=int):
if 'has_sos_report' in clusters[cl]['nodes'][n]:
has_sos = bcolors.OKGREEN + 'yes' + bcolors.ENDC
else:
has_sos = bcolors.FAIL + 'NO' + bcolors.ENDC
if 'has_crm_report' in clusters[cl]['nodes'][n]:
has_crm = bcolors.OKGREEN + 'yes' + bcolors.ENDC
else:
has_crm = bcolors.FAIL + 'NO' + bcolors.ENDC
if 'ip' in clusters[cl]['nodes'][n] and '==global==' in clusters[cl]['nodes'][n]['ip']:
show_ip = bcolors.OKCYAN + clusters[cl]['nodes'][n]['ip']['==global=='] + bcolors.ENDC
else:
show_ip = bcolors.WARNING + 'undetected' + bcolors.ENDC
if clusters[cl]['type'] == 'pacemaker':
print("{0:2s} {1:24s} {2:13s} {3:13s} {4:15s}".format(n, show_ip, has_sos, has_crm, clusters[cl]['nodes'][n]['name']))
elif clusters[cl]['type'] == 'rgmanager':
print("{0:2s} {1:24s} {2:13s} {3:15s}".format(n, show_ip, has_sos, clusters[cl]['nodes'][n]['name']))
if verbose:
if 'sosreports' in clusters[cl]['nodes'][n]:
for sos in clusters[cl]['nodes'][n]['sosreports']:
print(' - '+sos+' - '+clusters[cl]['nodes'][n]['sosreports'][sos])
else:
print('[ no sosreports for this node]')