diff --git a/src/etc/htmldocck.py b/src/etc/htmldocck.py index 2acee8a97f59f..8362c239b655d 100644 --- a/src/etc/htmldocck.py +++ b/src/etc/htmldocck.py @@ -104,6 +104,7 @@ """ +from __future__ import print_function import sys import os.path import re @@ -160,8 +161,13 @@ def close(self): HTMLParser.close(self) return self.__builder.close() -Command = namedtuple('Command', 'negated cmd args lineno') +Command = namedtuple('Command', 'negated cmd args lineno context') +class FailedCheck(Exception): + pass + +class InvalidCheck(Exception): + pass def concat_multi_lines(f): """returns a generator out of the file object, which @@ -196,7 +202,7 @@ def concat_multi_lines(f): catenated = '' if lastline is not None: - raise RuntimeError('Trailing backslash in the end of file') + print_err(lineno, line, 'Trailing backslash at the end of the file') LINE_PATTERN = re.compile(r''' (?<=(?!?) @@ -216,9 +222,10 @@ def get_commands(template): cmd = m.group('cmd') args = m.group('args') if args and not args[:1].isspace(): - raise RuntimeError('Invalid template syntax at line {}'.format(lineno+1)) + print_err(lineno, line, 'Invalid template syntax') + continue args = shlex.split(args) - yield Command(negated=negated, cmd=cmd, args=args, lineno=lineno+1) + yield Command(negated=negated, cmd=cmd, args=args, lineno=lineno+1, context=line) def _flatten(node, acc): @@ -242,8 +249,7 @@ def normalize_xpath(path): elif path.startswith('.//'): return path else: - raise RuntimeError('Non-absolute XPath is not supported due to \ - the implementation issue.') + raise InvalidCheck('Non-absolute XPath is not supported due to implementation issues') class CachedFiles(object): @@ -259,41 +265,40 @@ def resolve_path(self, path): self.last_path = path return path elif self.last_path is None: - raise RuntimeError('Tried to use the previous path in the first command') + raise InvalidCheck('Tried to use the previous path in the first command') else: return self.last_path def get_file(self, path): path = self.resolve_path(path) - try: + if path in self.files: return self.files[path] - except KeyError: - try: - with open(os.path.join(self.root, path)) as f: - data = f.read() - except Exception as e: - raise RuntimeError('Cannot open file {!r}: {}'.format(path, e)) - else: - self.files[path] = data - return data + + abspath = os.path.join(self.root, path) + if not(os.path.exists(abspath) and os.path.isfile(abspath)): + raise FailedCheck('File does not exist {!r}'.format(path)) + + with open(abspath) as f: + data = f.read() + self.files[path] = data + return data def get_tree(self, path): path = self.resolve_path(path) - try: + if path in self.trees: return self.trees[path] - except KeyError: - try: - f = open(os.path.join(self.root, path)) - except Exception as e: - raise RuntimeError('Cannot open file {!r}: {}'.format(path, e)) + + abspath = os.path.join(self.root, path) + if not(os.path.exists(abspath) and os.path.isfile(abspath)): + raise FailedCheck('File does not exist {!r}'.format(path)) + + with open(abspath) as f: try: - with f: - tree = ET.parse(f, CustomHTMLParser()) + tree = ET.parse(f, CustomHTMLParser()) except Exception as e: raise RuntimeError('Cannot parse an HTML file {!r}: {}'.format(path, e)) - else: - self.trees[path] = tree - return self.trees[path] + self.trees[path] = tree + return self.trees[path] def check_string(data, pat, regexp): @@ -311,14 +316,14 @@ def check_tree_attr(tree, path, attr, pat, regexp): path = normalize_xpath(path) ret = False for e in tree.findall(path): - try: + if attr in e.attrib: value = e.attrib[attr] - except KeyError: - continue else: - ret = check_string(value, pat, regexp) - if ret: - break + continue + + ret = check_string(value, pat, regexp) + if ret: + break return ret @@ -341,57 +346,84 @@ def check_tree_count(tree, path, count): path = normalize_xpath(path) return len(tree.findall(path)) == count +def stderr(*args): + print(*args, file=sys.stderr) -def check(target, commands): - cache = CachedFiles(target) - for c in commands: +def print_err(lineno, context, err, message=None): + global ERR_COUNT + ERR_COUNT += 1 + stderr("{}: {}".format(lineno, message or err)) + if message and err: + stderr("\t{}".format(err)) + + if context: + stderr("\t{}".format(context)) + +ERR_COUNT = 0 + +def check_command(c, cache): + try: + cerr = "" if c.cmd == 'has' or c.cmd == 'matches': # string test regexp = (c.cmd == 'matches') if len(c.args) == 1 and not regexp: # @has = file existence try: cache.get_file(c.args[0]) ret = True - except RuntimeError: + except FailedCheck as err: + cerr = err.message ret = False elif len(c.args) == 2: # @has/matches = string test + cerr = "`PATTERN` did not match" ret = check_string(cache.get_file(c.args[0]), c.args[1], regexp) elif len(c.args) == 3: # @has/matches = XML tree test + cerr = "`XPATH PATTERN` did not match" tree = cache.get_tree(c.args[0]) pat, sep, attr = c.args[1].partition('/@') if sep: # attribute - ret = check_tree_attr(cache.get_tree(c.args[0]), pat, attr, c.args[2], regexp) + tree = cache.get_tree(c.args[0]) + ret = check_tree_attr(tree, pat, attr, c.args[2], regexp) else: # normalized text pat = c.args[1] if pat.endswith('/text()'): pat = pat[:-7] ret = check_tree_text(cache.get_tree(c.args[0]), pat, c.args[2], regexp) else: - raise RuntimeError('Invalid number of @{} arguments \ - at line {}'.format(c.cmd, c.lineno)) + raise InvalidCheck('Invalid number of @{} arguments'.format(c.cmd)) elif c.cmd == 'count': # count test if len(c.args) == 3: # @count = count test ret = check_tree_count(cache.get_tree(c.args[0]), c.args[1], int(c.args[2])) else: - raise RuntimeError('Invalid number of @{} arguments \ - at line {}'.format(c.cmd, c.lineno)) - + raise InvalidCheck('Invalid number of @{} arguments'.format(c.cmd)) elif c.cmd == 'valid-html': - raise RuntimeError('Unimplemented @valid-html at line {}'.format(c.lineno)) + raise InvalidCheck('Unimplemented @valid-html') elif c.cmd == 'valid-links': - raise RuntimeError('Unimplemented @valid-links at line {}'.format(c.lineno)) - + raise InvalidCheck('Unimplemented @valid-links') else: - raise RuntimeError('Unrecognized @{} at line {}'.format(c.cmd, c.lineno)) + raise InvalidCheck('Unrecognized @{}'.format(c.cmd)) if ret == c.negated: - raise RuntimeError('@{}{} check failed at line {}'.format('!' if c.negated else '', - c.cmd, c.lineno)) + raise FailedCheck(cerr) + + except FailedCheck as err: + message = '@{}{} check failed'.format('!' if c.negated else '', c.cmd) + print_err(c.lineno, c.context, err.message, message) + except InvalidCheck as err: + print_err(c.lineno, c.context, err.message) + +def check(target, commands): + cache = CachedFiles(target) + for c in commands: + check_command(c, cache) if __name__ == '__main__': - if len(sys.argv) < 3: - print >>sys.stderr, 'Usage: {}