diff --git a/checks/system.py b/checks/system.py index 3412e72bfd..de2b343943 100644 --- a/checks/system.py +++ b/checks/system.py @@ -1,97 +1,120 @@ import re +import socket +import string import subprocess import sys -import socket import time from checks import gethostname class Disk(object): - def check(self, logger, agentConfig): - logger.debug('getDiskUsage: start') + + def _parse_df(self, lines, inodes = False, use_volume=True): + """Multi-platform df output parser - # Memory logging (case 27152) - if agentConfig['debugMode'] and sys.platform == 'linux2': - mem = subprocess.Popen(['free', '-m'], stdout=subprocess.PIPE, close_fds=True).communicate()[0] - logger.debug('getDiskUsage: memory before Popen - ' + str(mem)) + If use_volume is true the volume rather than the mount point is used + to anchor the metric. If false the mount point is used. + + e.g. /dev/sda1 .... /my_mount + _parse_df picks /dev/sda1 if use_volume, /my_mount if not + + If inodes is True, count inodes instead + """ + + # Simple list-oriented processing + # No exec-time optimal but simpler code + # 1. filter out the header line (once) + # 2. ditch fake volumes (dev fs, etc.) starting with a none volume + # when the volume is too long it sits on a line by itself so collate back + # 3. if we want to use the mount point, replace the volume name on each line + # 4. extract interesting metrics + + usageData = [] + + # 1. + lines = map(string.strip, lines.split("\n"))[1:] + + numbers = re.compile(r'([0-9]+)') + previous = None - # Get output from df + for line in lines: + parts = line.split() + + # skip empty lines + if len(parts) == 0: continue + + try: + + # 2. + if len(parts) == 1: + # volume on a line by itself + previous = parts[0] + continue + elif parts[0] == "none": + # this is a "fake" volume + continue + elif not numbers.match(parts[1]): + # this is a volume like "map auto_home" + continue + else: + if previous and numbers.match(parts[0]): + # collate with previous line + parts.insert(0, previous) + previous = None + # 3. + if not use_volume: + parts[0] = parts[-1] + + # 4. + if inodes: + if sys.platform == "darwin": + # Filesystem 512-blocks Used Available Capacity iused ifree %iused Mounted + # Inodes are in position 5, 6 and we need to compute the total + # Total + parts[1] = int(parts[5]) + int(parts[6]) + # Used + parts[2] = int(parts[5]) + # Available + parts[3] = int(parts[6]) + else: + # Total + parts[1] = int(parts[1]) + # Used + parts[2] = int(parts[2]) + # Available + parts[3] = int(parts[3]) + else: + in_mb = lambda s: int(s) / 1024 / 1024 + # Total + parts[1] = in_mb(parts[1]) + # Used + parts[2] = in_mb(parts[2]) + # Available + parts[3] = in_mb(parts[3]) + except IndexError: + logger.exception("Cannot parse %s" % (parts,)) + + usageData.append(parts) + return usageData + + def check(self, logger, agentConfig): + """Get disk space/inode stats""" + + # Check test_system for some examples of output try: - logger.debug('getDiskUsage: attempting Popen') - - df = subprocess.Popen(['df', '-k'], stdout=subprocess.PIPE, close_fds=True).communicate()[0] # -k option uses 1024 byte blocks so we can calculate into MB - + df = subprocess.Popen(['df', '-k'], + stdout=subprocess.PIPE, + close_fds=True) + + disks = self._parse_df(df.stdout.readlines()) + + df = subprocess.Popen(['df', '-i'], + stdout=subprocess.PIPE, + close_fds=True) + inodes = self._parse_df(df.stdout.readlines()) + return (disks, inodes) except: logger.exception('getDiskUsage') return False - - # Memory logging (case 27152) - if agentConfig['debugMode'] and sys.platform == 'linux2': - mem = subprocess.Popen(['free', '-m'], stdout=subprocess.PIPE, close_fds=True).communicate()[0] - logger.debug('getDiskUsage: memory after Popen - ' + str(mem)) - - logger.debug('getDiskUsage: Popen success, start parsing') - - # Split out each volume - volumes = df.split('\n') - - logger.debug('getDiskUsage: parsing, split') - - # Remove first (headings) and last (blank) - volumes.pop(0) - volumes.pop() - - logger.debug('getDiskUsage: parsing, pop') - - usageData = [] - - regexp = re.compile(r'([0-9]+)') - - # Set some defaults - previousVolume = None - volumeCount = 0 - - logger.debug('getDiskUsage: parsing, start loop') - - for volume in volumes: - logger.debug('getDiskUsage: parsing volume: ' + volume) - - # Split out the string - volume = volume.split(None, 10) - - # Handle df output wrapping onto multiple lines (case 27078 and case 30997) - # Thanks to http://github.com/sneeu - if len(volume) == 1: # If the length is 1 then this just has the mount name - previousVolume = volume[0] # We store it, then continue the for - continue - - if previousVolume != None: # If the previousVolume was set (above) during the last loop - volume.insert(0, previousVolume) # then we need to insert it into the volume - previousVolume = None # then reset so we don't use it again - - volumeCount = volumeCount + 1 - - # Sometimes the first column will have a space, which is usually a system line that isn't relevant - # e.g. map -hosts 0 0 0 100% /net - # so we just get rid of it - if re.match(regexp, volume[1]) == None: - - pass - - else: - try: - volume[2] = int(volume[2]) / 1024 / 1024 # Used - volume[3] = int(volume[3]) / 1024 / 1024 # Available - except IndexError: - logger.debug('getDiskUsage: parsing, loop IndexError - Used or Available not present') - - except KeyError: - logger.debug('getDiskUsage: parsing, loop KeyError - Used or Available not present') - - usageData.append(volume) - - logger.debug('getDiskUsage: completed, returning') - - return usageData class IO(object): @@ -161,11 +184,6 @@ def check(self, logger, agentConfig): # If Linux like procfs system is present and mounted we use loadavg, else we use uptime if sys.platform == 'linux2' or (sys.platform.find('freebsd') != -1 and self.linuxProcFsLocation != False): - if sys.platform == 'linux2': - logger.debug('getLoadAvrgs: linux2') - else: - logger.debug('getLoadAvrgs: freebsd (loadavg)') - try: logger.debug('getLoadAvrgs: attempting open') diff --git a/tests/test_system.py b/tests/test_system.py index c74805ec42..7b2642ab9f 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -14,5 +14,77 @@ def testCPU(self): # Make sure we sum up to 100% (or 99% in the case of macs) assert abs(reduce(lambda a,b:a+b, res.values(), 0) - 100) <= 1, res + def testDisk(self): + """Testing disk stats gathering""" + global logger + disk = Disk() + res = disk.check(logger, {}) + + lion_df_i = """Filesystem 512-blocks Used Available Capacity iused ifree %iused Mounted onto +/dev/disk1 487932936 220080040 267340896 46% 27574003 33417612 45% / +devfs 374 374 0 100% 648 0 100% /dev +map -hosts 0 0 0 100% 0 0 100% /net +map auto_home 0 0 0 100% 0 0 100% /home +localhost:/KJDS7Bgpbp1QglL9lBwOe6 487932936 487932936 0 100% 0 0 100% /Volumes/MobileBackups +/dev/disk2s1 62309376 5013120 57296256 9% 0 0 100% /Volumes/NO name""" + + lion_df_k = """Filesystem 1024-blocks Used Available Capacity Mounted onto +/dev/disk1 243966468 110040020 133670448 46% / +devfs 187 187 0 100% /dev +map -hosts 0 0 0 100% /net +map auto_home 0 0 0 100% /home +localhost:/KJDS7Bgpbp1QglL9lBwOe6 243966468 243966468 0 100% /Volumes/MobileBackups +/dev/disk2s1 31154688 2506560 28648128 9% /Volumes/NO NAME""" + + linux_df_k = """Filesystem 1K-blocks Used Available Use% Mounted on +/dev/sda1 8256952 5600592 2236932 72% / +none 3802316 124 3802192 1% /dev +none 3943856 0 3943856 0% /dev/shm +none 3943856 148 3943708 1% /var/run +none 3943856 0 3943856 0% /var/lock +none 3943856 0 3943856 0% /lib/init/rw +/dev/sdb 433455904 305360 411132240 1% /mnt +/dev/sdf 52403200 40909112 11494088 79% /data +nfs:/abc/def/ghi/jkl/mno/pqr + 52403200 40909112 11494088 79% /data +/dev/sdg 52403200 40909112 11494088 79% /data +""" + + linux_df_i = """Filesystem Inodes IUsed IFree IUse% Mounted on +/dev/sda1 524288 171642 352646 33% / +none 950579 2019 948560 1% /dev +none 985964 1 985963 1% /dev/shm +none 985964 66 985898 1% /var/run +none 985964 3 985961 1% /var/lock +none 985964 1 985963 1% /lib/init/rw +/dev/sdb 27525120 147 27524973 1% /mnt +/dev/sdf 46474080 478386 45995694 2% /data +""" + + def testDfParser(self): + global logger + disk = Disk() + + import sys + sys.platform = 'darwin' + res = disk._parse_df(TestSystem.lion_df_k) + assert res[0][:4] == ["/dev/disk1", 243966468 / 1024 / 1024, 110040020 / 1024 / 1024, 133670448 / 1024 / 1024], res[0] + assert res[3][:4] == ["/dev/disk2s1", 31154688 / 1024 / 1024, 2506560 / 1024 / 1024, 28648128 / 1024 / 1024], res[3] + + res = disk._parse_df(TestSystem.lion_df_i, inodes = True) + assert res[0][:4] == ["/dev/disk1", 60991615, 27574003, 33417612], res[0] + + sys.platform = 'linux2' + res = disk._parse_df(TestSystem.linux_df_k) + assert res[0][:4] == ["/dev/sda1", 8256952 / 1024 / 1024, 5600592 / 1024 / 1024, 2236932 / 1024 / 1024], res[0] + assert res[-3][:4] == ["/dev/sdf", 52403200 / 1024 / 1024, 40909112 / 1024 / 1024, 11494088 / 1024 / 1024], res[-2] + assert res[-2][:4] == ["nfs:/abc/def/ghi/jkl/mno/pqr", 52403200 / 1024 / 1024, 40909112 / 1024 / 1024, 11494088 / 1024 / 1024], res[-1] + assert res[-1][:4] == ["/dev/sdg", 52403200 / 1024 / 1024, 40909112 / 1024 / 1024, 11494088 / 1024 / 1024], res[-2] + + res = disk._parse_df(TestSystem.linux_df_i, inodes = True) + assert res[0][:4] == ["/dev/sda1", 524288, 171642, 352646], res[0] + assert res[1][:4] == ["/dev/sdb", 27525120, 147, 27524973], res[1] + assert res[2][:4] == ["/dev/sdf", 46474080, 478386, 45995694], res[2] + if __name__ == "__main__": unittest.main()