From 6539f1e3433e77e23a33e293007f2da993c96abc Mon Sep 17 00:00:00 2001 From: James Graham Date: Thu, 21 May 2020 19:50:11 +0100 Subject: [PATCH] Fix a bunch of Python 3 issues in the manifest code The goal here is to get mypy passing in Python 3; prior to this patch there were a lot of cases where Python 3 behaviour was broken but not picked up by the tests (e.g. because it only happened when updating the manifest or with certain caches or similar). Many of these issues were seen by mypy, but that had been disabled for Py3 specfically to avoid having to fix all the issues at the time the annotations were added. But now we do need that support. The general approach here is to use the same types for Py 2 and Py 3. In particular we use Text where possible (with a couple of exceptions where it was easier not to), rather than bytes. The native str type is avoided wherever possible since this introduces difficult to debug differences between Python versions. The preference for Text includes paths, which means we no longer support running in non-unicode directories (in practice it is likely to be broken if the path isn't UTF-8 compatible on Unix or UTF-16 compatible on Windows). --- tools/gitignore/gitignore.py | 136 ++++++++-------- tools/gitignore/tests/test_gitignore.py | 93 ++++++----- tools/lint/fnmatch.py | 8 +- tools/lint/lint.py | 147 +++++++++--------- tools/lint/rules.py | 2 +- tools/lint/tests/test_lint.py | 20 +-- tools/localpaths.py | 4 +- tools/manifest/XMLParser.py | 30 ++-- tools/manifest/download.py | 21 ++- tools/manifest/item.py | 2 +- tools/manifest/manifest.py | 102 ++++++++---- tools/manifest/sourcefile.py | 66 ++++---- tools/manifest/utils.py | 58 +++---- tools/manifest/vcs.py | 69 ++++---- tools/tox.ini | 18 +-- tools/wpt/revlist.py | 17 +- tools/wpt/testfiles.py | 98 ++++++------ tools/wpt/tests/test_testfiles.py | 6 +- tools/wpt/utils.py | 10 +- .../wptrunner/wptrunner/tests/test_update.py | 21 ++- .../wptrunner/wptrunner/tests/test_wpttest.py | 16 +- 21 files changed, 492 insertions(+), 452 deletions(-) diff --git a/tools/gitignore/gitignore.py b/tools/gitignore/gitignore.py index 0f3d9450d12344..2a2761518663ca 100644 --- a/tools/gitignore/gitignore.py +++ b/tools/gitignore/gitignore.py @@ -1,7 +1,7 @@ import re import os import itertools -from six import itervalues, iteritems +from six import ensure_binary, itervalues, iteritems from collections import defaultdict MYPY = False @@ -26,70 +26,72 @@ def fnmatch_translate(pat): - # type: (str) -> Tuple[bool, Pattern[str]] + # type: (bytes) -> Tuple[bool, Pattern[bytes]] parts = [] seq = None i = 0 - any_char = "[^/]" - if pat[0] == "/": - parts.append("^") + any_char = b"[^/]" + if pat[0:1] == b"/": + parts.append(b"^") pat = pat[1:] else: # By default match the entire path up to a / # but if / doesn't appear in the pattern we will mark is as # a name pattern and just produce a pattern that matches against # the filename - parts.append("^(?:.*/)?") + parts.append(b"^(?:.*/)?") name_pattern = True - if pat[-1] == "/": + if pat[-1:] == b"/": # If the last character is / match this directory or any subdirectory pat = pat[:-1] - suffix = "(?:/|$)" + suffix = b"(?:/|$)" else: - suffix = "$" + suffix = b"$" while i < len(pat): - c = pat[i] - if c == "\\": + c = pat[i:i+1] + if c == b"\\": if i < len(pat) - 1: i += 1 - c = pat[i] + c = pat[i:i+1] parts.append(re.escape(c)) else: raise ValueError elif seq is not None: # TODO: this doesn't really handle invalid sequences in the right way - if c == "]": + if c == b"]": seq = None - if parts[-1] == "[": + if parts[-1:] == b"[": parts = parts[:-1] - elif parts[-1] == "^" and parts[-2] == "[": + elif parts[-1:] == b"^" and parts[-2:-1] == b"[": parts = parts[:-2] else: parts.append(c) - elif c == "-": + elif c == b"-": parts.append(c) + elif c == b"[": + raise ValueError else: - parts += re.escape(c) - elif c == "[": - parts.append("[") - if i < len(pat) - 1 and pat[i+1] in ("!", "^"): - parts.append("^") + parts.append(re.escape(c)) + elif c == b"[": + parts.append(b"[") + if i < len(pat) - 1 and pat[i+1:i+2] in (b"!", b"^"): + parts.append(b"^") i += 1 seq = i - elif c == "*": - if i < len(pat) - 1 and pat[i+1] == "*": - if i > 0 and pat[i-1] != "/": + elif c == b"*": + if i < len(pat) - 1 and pat[i+1:i+2] == b"*": + if i > 0 and pat[i-1:i] != b"/": raise ValueError - parts.append(".*") + parts.append(b".*") i += 1 - if i < len(pat) - 1 and pat[i+1] != "/": + if i < len(pat) - 1 and pat[i+1:i+2] != b"/": raise ValueError else: - parts.append(any_char + "*") - elif c == "?": + parts.append(any_char + b"*") + elif c == b"?": parts.append(any_char) - elif c == "/" and not seq: + elif c == b"/" and not seq: name_pattern = False parts.append(c) else: @@ -97,31 +99,31 @@ def fnmatch_translate(pat): i += 1 if name_pattern: - parts[0] = "^" + parts[0] = b"^" if seq is not None: raise ValueError parts.append(suffix) try: - return name_pattern, re.compile("".join(parts)) + return name_pattern, re.compile(b"".join(parts)) except Exception: raise ValueError # Regexp matching rules that have to be converted to patterns -pattern_re = re.compile(r".*[\*\[\?]") +pattern_re = re.compile(br".*[\*\[\?]") def parse_line(line): - # type: (str) -> Optional[Tuple[bool, bool, bool, Union[Tuple[str, ...], Tuple[bool, Pattern[str]]]]] + # type: (bytes) -> Optional[Tuple[bool, bool, bool, Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]]]] line = line.rstrip() - if not line or line[0] == "#": + if not line or line[0:1] == b"#": return None - invert = line[0] == "!" + invert = line[0:1] == b"!" if invert: line = line[1:] - dir_only = line[-1] == "/" + dir_only = line[-1:] == b"/" if dir_only: line = line[:-1] @@ -129,7 +131,7 @@ def parse_line(line): # Could make a special case for **/foo, but we don't have any patterns like that if not invert and not pattern_re.match(line): literal = True - pattern = tuple(line.rsplit("/", 1)) # type: Union[Tuple[str, ...], Tuple[bool, Pattern[str]]] + pattern = tuple(line.rsplit(b"/", 1)) # type: Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]] else: pattern = fnmatch_translate(line) literal = False @@ -139,9 +141,9 @@ def parse_line(line): class PathFilter(object): def __init__(self, root, extras=None, cache=None): - # type: (str, Optional[List[str]], Optional[MutableMapping[str, bool]]) -> None + # type: (bytes, Optional[List[bytes]], Optional[MutableMapping[bytes, bool]]) -> None if root: - ignore_path = os.path.join(root, ".gitignore") # type: Optional[str] + ignore_path = os.path.join(root, b".gitignore") # type: Optional[bytes] else: ignore_path = None if not ignore_path and not extras: @@ -149,32 +151,32 @@ def __init__(self, root, extras=None, cache=None): return self.trivial = False - self.literals_file = defaultdict(dict) # type: Dict[Optional[str], Dict[str, List[Tuple[bool, Pattern[str]]]]] - self.literals_dir = defaultdict(dict) # type: Dict[Optional[str], Dict[str, List[Tuple[bool, Pattern[str]]]]] - self.patterns_file = [] # type: List[Tuple[Tuple[bool, Pattern[str]], List[Tuple[bool, Pattern[str]]]]] - self.patterns_dir = [] # type: List[Tuple[Tuple[bool, Pattern[str]], List[Tuple[bool, Pattern[str]]]]] - self.cache = cache or {} # type: MutableMapping[str, bool] + self.literals_file = defaultdict(dict) # type: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]] + self.literals_dir = defaultdict(dict) # type: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]] + self.patterns_file = [] # type: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]] + self.patterns_dir = [] # type: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]] + self.cache = cache or {} # type: MutableMapping[bytes, bool] if extras is None: extras = [] if ignore_path and os.path.exists(ignore_path): - args = ignore_path, extras # type: Tuple[Optional[str], List[str]] + args = ignore_path, extras # type: Tuple[Optional[bytes], List[bytes]] else: args = None, extras self._read_ignore(*args) def _read_ignore(self, ignore_path, extras): - # type: (Optional[str], List[str]) -> None + # type: (Optional[bytes], List[bytes]) -> None if ignore_path is not None: - with open(ignore_path) as f: + with open(ignore_path, "rb") as f: for line in f: self._read_line(line) for line in extras: self._read_line(line) def _read_line(self, line): - # type: (str) -> None + # type: (bytes) -> None parsed = parse_line(line) if not parsed: return @@ -186,13 +188,13 @@ def _read_line(self, line): # overriden by an exclude rule assert not literal if MYPY: - rule = cast(Tuple[bool, Pattern[str]], rule) + rule = cast(Tuple[bool, Pattern[bytes]], rule) if not dir_only: rules_iter = itertools.chain( itertools.chain(*(iteritems(item) for item in itervalues(self.literals_dir))), itertools.chain(*(iteritems(item) for item in itervalues(self.literals_file))), self.patterns_dir, - self.patterns_file) # type: Iterable[Tuple[Any, List[Tuple[bool, Pattern[str]]]]] + self.patterns_file) # type: Iterable[Tuple[Any, List[Tuple[bool, Pattern[bytes]]]]] else: rules_iter = itertools.chain( itertools.chain(*(iteritems(item) for item in itervalues(self.literals_dir))), @@ -203,9 +205,9 @@ def _read_line(self, line): else: if literal: if MYPY: - rule = cast(Tuple[str, ...], rule) + rule = cast(Tuple[bytes, ...], rule) if len(rule) == 1: - dir_name, pattern = None, rule[0] # type: Tuple[Optional[str], str] + dir_name, pattern = None, rule[0] # type: Tuple[Optional[bytes], bytes] else: dir_name, pattern = rule self.literals_dir[dir_name][pattern] = [] @@ -213,31 +215,31 @@ def _read_line(self, line): self.literals_file[dir_name][pattern] = [] else: if MYPY: - rule = cast(Tuple[bool, Pattern[str]], rule) + rule = cast(Tuple[bool, Pattern[bytes]], rule) self.patterns_dir.append((rule, [])) if not dir_only: self.patterns_file.append((rule, [])) def filter(self, - iterator # type: Iterable[Tuple[str, List[Tuple[str, T]], List[Tuple[str, T]]]] + iterator # type: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]] ): - # type: (...) -> Iterable[Tuple[str, List[Tuple[str, T]], List[Tuple[str, T]]]] + # type: (...) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]] empty = {} # type: Dict[Any, Any] for dirpath, dirnames, filenames in iterator: orig_dirpath = dirpath - if os.path.sep != "/": - dirpath = dirpath.replace(os.path.sep, "/") + if ensure_binary(os.path.sep) != b"/": + dirpath = dirpath.replace(ensure_binary(os.path.sep), b"/") - keep_dirs = [] # type: List[Tuple[str, T]] - keep_files = [] # type: List[Tuple[str, T]] + keep_dirs = [] # type: List[Tuple[bytes, T]] + keep_files = [] # type: List[Tuple[bytes, T]] for iter_items, literals, patterns, target, suffix in [ - (dirnames, self.literals_dir, self.patterns_dir, keep_dirs, "/"), - (filenames, self.literals_file, self.patterns_file, keep_files, "")]: + (dirnames, self.literals_dir, self.patterns_dir, keep_dirs, b"/"), + (filenames, self.literals_file, self.patterns_file, keep_files, b"")]: for item in iter_items: name = item[0] if dirpath: - path = "%s/%s" % (dirpath, name) + suffix + path = b"%s/%s" % (dirpath, name) + suffix else: path = name + suffix if path in self.cache: @@ -269,13 +271,13 @@ def filter(self, target.append(item) dirnames[:] = keep_dirs - assert not any(".git" == name for name, _ in dirnames) + assert not any(b".git" == name for name, _ in dirnames) yield orig_dirpath, dirnames, keep_files def __call__(self, - iterator # type: Iterable[Tuple[str, List[Tuple[str, T]], List[Tuple[str, T]]]] + iterator # type: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]] ): - # type: (...) -> Iterable[Tuple[str, List[Tuple[str, T]], List[Tuple[str, T]]]] + # type: (...) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]] if self.trivial: return iterator @@ -283,5 +285,5 @@ def __call__(self, def has_ignore(dirpath): - # type: (str) -> bool - return os.path.exists(os.path.join(dirpath, ".gitignore")) + # type: (bytes) -> bool + return os.path.exists(os.path.join(dirpath, b".gitignore")) diff --git a/tools/gitignore/tests/test_gitignore.py b/tools/gitignore/tests/test_gitignore.py index 8131a71875b4d5..317f3799b62bb2 100644 --- a/tools/gitignore/tests/test_gitignore.py +++ b/tools/gitignore/tests/test_gitignore.py @@ -10,62 +10,61 @@ from typing import Sequence match_data = [ - ("foo", True, ["a/foo", "foo"]), - ("*.a", True, ["foo.a", "a/foo.a", "a/b/foo.a", "a.a/foo.a"]), - ("*.py[co]", True, ["a.pyc", "a.pyo", "a/b/c.pyc"]), - ("\\#*", True, ["#a", "a/#b"]), - ("*#", True, ["a#", "a/b#", "#a#"]), - ("/*.c", True, ["a.c", ".c"]), - ("**/b", False, ["a/b", "a/c/b"]), - ("*b", True, ["ab"]), - ("*b", True, ["a/b"]), - ("**/b", False, ["a/b"]), - ("a/", True, ["a"]), - ("a[/]b", True, []), - ("**/b", False, ["a/c/b"]), - ("a?c", True, ["abc"]), - ("a[^b]c", True, ["acc"]), - ("a[b-c]c", True, ["abc", "acc"]), - ("a[^]c", True, ["ac"]), # This is probably wrong - ("a[^]c", True, ["ac"]), # This is probably wrong -] # type: Sequence[Tuple[str, bool, Iterable[str]]] + (b"foo", True, [b"a/foo", b"foo"]), + (b"*.a", True, [b"foo.a", b"a/foo.a", b"a/b/foo.a", b"a.a/foo.a"]), + (b"*.py[co]", True, [b"a.pyc", b"a.pyo", b"a/b/c.pyc"]), + (b"\\#*", True, [b"#a", b"a/#b"]), + (b"*#", True, [b"a#", b"a/b#", b"#a#"]), + (b"/*.c", True, [b"a.c", b".c"]), + (b"**/b", False, [b"a/b", b"a/c/b"]), + (b"*b", True, [b"ab"]), + (b"*b", True, [b"a/b"]), + (b"**/b", False, [b"a/b"]), + (b"a/", True, [b"a"]), + (b"a[/]b", True, []), + (b"**/b", False, [b"a/c/b"]), + (b"a?c", True, [b"abc"]), + (b"a[^b]c", True, [b"acc"]), + (b"a[b-c]c", True, [b"abc", b"acc"]), +] # type: Sequence[Tuple[bytes, bool, Iterable[bytes]]] mismatch_data = [ - ("foo", True, ["foob", "afoo"]), - ("*.a", True, ["a", "foo:a", "a.a/foo"]), - ("*.py[co]", True, ["a.pyd", "pyo", "a.py"]), - ("a", True, ["ab"]), - ("a?c", True, ["ac", "abbc"]), - ("a[^b]c", True, ["abc"]), - ("a[b-c]c", True, ["adc"]), -] # type: Sequence[Tuple[str, bool, Iterable[str]]] + (b"foo", True, [b"foob", b"afoo"]), + (b"*.a", True, [b"a", b"foo:a", b"a.a/foo"]), + (b"*.py[co]", True, [b"a.pyd", b"pyo", b"a.py"]), + (b"a", True, [b"ab"]), + (b"a?c", True, [b"ac", b"abbc"]), + (b"a[^b]c", True, [b"abc"]), + (b"a[b-c]c", True, [b"adc"]), +] # type: Sequence[Tuple[bytes, bool, Iterable[bytes]]] invalid_data = [ - "[a", - "***/foo", - "a\\", - "**b", - "b**/", - "[[]" + b"[a", + b"***/foo", + b"a\\", + b"**b", + b"b**/", + b"[[]", + b"a[^]c", ] filter_data = [ - (["foo", "bar/", "/a", "*.py"], - [("", ["foo", "bar", "baz"], ["a"]), - ("baz", ["a"], ["foo", "bar"])], - [(["baz"], []), - (["a"], ["bar"])]), - (["#foo", "", "a*", "!a.py"], - [("", ["foo"], ["a", "a.foo", "a.py"])], - [(["foo"], ["a.py"])]), - (["a.foo", "!a.py"], - [("", ["foo"], ["a", "a.foo", "a.py"])], - [(["foo"], ["a", "a.py"])]), + ([b"foo", b"bar/", b"/a", b"*.py"], + [(b"", [b"foo", b"bar", b"baz"], [b"a"]), + (b"baz", [b"a"], [b"foo", b"bar"])], + [([b"baz"], []), + ([b"a"], [b"bar"])]), + ([b"#foo", b"", b"a*", b"!a.py"], + [(b"", [b"foo"], [b"a", b"a.foo", b"a.py"])], + [([b"foo"], [b"a.py"])]), + ([b"a.foo", b"!a.py"], + [(b"", [b"foo"], [b"a", b"a.foo", b"a.py"])], + [([b"foo"], [b"a", b"a.py"])]), ] def expand_data(compact_data): - # type: (Sequence[Tuple[str, bool, Iterable[str]]]) -> Iterable[Tuple[str, bool, str]] + # type: (Sequence[Tuple[bytes, bool, Iterable[bytes]]]) -> Iterable[Tuple[bytes, bool, bytes]] for pattern, name_only, inputs in compact_data: for input in inputs: yield pattern, name_only, input @@ -76,7 +75,7 @@ def tests_match(pattern, name_only, input): name_only_result, regexp = fnmatch_translate(pattern) assert name_only_result == name_only if name_only: - input = input.rsplit("/", 1)[-1] + input = input.rsplit(b"/", 1)[-1] assert regexp.match(input) is not None @@ -85,7 +84,7 @@ def tests_no_match(pattern, name_only, input): name_only_result, regexp = fnmatch_translate(pattern) assert name_only_result == name_only if name_only: - input = input.rsplit("/", 1)[-1] + input = input.rsplit(b"/", 1)[-1] assert regexp.match(input) is None diff --git a/tools/lint/fnmatch.py b/tools/lint/fnmatch.py index 3d9ee23c97e5ba..0c45029b23915f 100644 --- a/tools/lint/fnmatch.py +++ b/tools/lint/fnmatch.py @@ -6,23 +6,23 @@ MYPY = False if MYPY: # MYPY is set to True when run under Mypy. - from typing import AnyStr from typing import Iterable from typing import List + from typing import Text __all__ = ["fnmatch", "fnmatchcase", "filter", "translate"] def fnmatch(name, pat): - # type: (AnyStr, AnyStr) -> bool + # type: (Text, Text) -> bool name = os.path.normcase(name) pat = os.path.normcase(pat) return fnmatchcase(name, pat) def fnmatchcase(name, pat): - # type: (AnyStr, AnyStr) -> bool + # type: (Text, Text) -> bool if '?' not in pat and '[' not in pat: wildcards = pat.count("*") if wildcards == 0: @@ -35,7 +35,7 @@ def fnmatchcase(name, pat): def filter(names, pat): - # type: (Iterable[AnyStr], AnyStr) -> List[AnyStr] + # type: (Iterable[Text], Text) -> List[Text] return [n for n in names if fnmatch(n, pat)] diff --git a/tools/lint/lint.py b/tools/lint/lint.py index fc1a816f8abf85..24d190c23b37a9 100644 --- a/tools/lint/lint.py +++ b/tools/lint/lint.py @@ -3,6 +3,7 @@ import abc import argparse import ast +import io import json import logging import os @@ -21,7 +22,7 @@ from ..manifest.vcs import walk from ..manifest.sourcefile import SourceFile, js_meta_re, python_meta_re, space_chars, get_any_variants -from six import binary_type, iteritems, itervalues, with_metaclass +from six import binary_type, ensure_binary, ensure_text, iteritems, itervalues, with_metaclass from six.moves import range from six.moves.urllib.parse import urlsplit, urljoin @@ -39,7 +40,6 @@ from typing import Text from typing import Tuple from typing import Type - from typing import Union # The Ignorelist is a two level dictionary. The top level is indexed by # error names (e.g. 'TRAILING WHITESPACE'). Each of those then has a map of @@ -51,6 +51,7 @@ logger = None # type: Optional[logging.Logger] + def setup_logging(prefix=False): # type: (bool) -> None global logger @@ -89,24 +90,27 @@ def setup_logging(prefix=False): %s: %s""" + def all_filesystem_paths(repo_root, subdir=None): - # type: (str, Optional[str]) -> Iterable[str] - path_filter = PathFilter(repo_root, extras=[str(".git/")]) + # type: (Text, Optional[Text]) -> Iterable[Text] + path_filter = PathFilter(repo_root.encode("utf8"), + extras=[ensure_binary(".git/")]) if subdir: - expanded_path = subdir + expanded_path = subdir.encode("utf8") + subdir_str = expanded_path else: - expanded_path = repo_root + expanded_path = repo_root.encode("utf8") for dirpath, dirnames, filenames in path_filter(walk(expanded_path)): for filename, _ in filenames: path = os.path.join(dirpath, filename) if subdir: - path = os.path.join(subdir, path) + path = os.path.join(subdir_str, path) assert not os.path.isabs(path), path - yield path + yield ensure_text(path) def _all_files_equal(paths): - # type: (Iterable[str]) -> bool + # type: (Iterable[Text]) -> bool """ Checks all the paths are files that are byte-for-byte identical @@ -145,21 +149,21 @@ def _all_files_equal(paths): def check_path_length(repo_root, path): - # type: (str, str) -> List[rules.Error] + # type: (Text, Text) -> List[rules.Error] if len(path) + 1 > 150: return [rules.PathLength.error(path, (path, len(path) + 1))] return [] def check_file_type(repo_root, path): - # type: (str, str) -> List[rules.Error] + # type: (Text, Text) -> List[rules.Error] if os.path.islink(path): return [rules.FileType.error(path, (path, "symlink"))] return [] def check_worker_collision(repo_root, path): - # type: (str, str) -> List[rules.Error] + # type: (Text, Text) -> List[rules.Error] endings = [(".any.html", ".any.js"), (".any.worker.html", ".any.js"), (".worker.html", ".worker.js")] @@ -170,7 +174,7 @@ def check_worker_collision(repo_root, path): def check_gitignore_file(repo_root, path): - # type: (str, str) -> List[rules.Error] + # type: (Text, Text) -> List[rules.Error] if not path.endswith(".gitignore"): return [] @@ -190,7 +194,7 @@ def check_gitignore_file(repo_root, path): def check_ahem_copy(repo_root, path): - # type: (str, str) -> List[rules.Error] + # type: (Text, Text) -> List[rules.Error] lpath = path.lower() if "ahem" in lpath and lpath.endswith(".ttf"): return [rules.AhemCopy.error(path)] @@ -198,7 +202,7 @@ def check_ahem_copy(repo_root, path): def check_git_ignore(repo_root, paths): - # type: (str, List[str]) -> List[rules.Error] + # type: (Text, List[Text]) -> List[rules.Error] errors = [] with tempfile.TemporaryFile('w+') as f: f.write('\n'.join(paths)) @@ -206,12 +210,13 @@ def check_git_ignore(repo_root, paths): try: matches = subprocess.check_output( ["git", "check-ignore", "--verbose", "--no-index", "--stdin"], stdin=f) - for match in matches.strip().split('\n'): - match_filter, path = match.split() - _, _, filter_string = match_filter.split(':') + for match in matches.strip().split(b'\n'): + match_filter, path_bytes = match.split() + _, _, filter_string = match_filter.split(b':') # If the matching filter reported by check-ignore is a special-case exception, # that's fine. Otherwise, it requires a new special-case exception. - if filter_string[0] != '!': + if filter_string[0:1] != b'!': + path = path_bytes.decode("utf8") errors.append(rules.IgnoredPath.error(path, (path,))) except subprocess.CalledProcessError: # Nonzero return code means that no match exists. @@ -225,7 +230,7 @@ def check_git_ignore(repo_root, paths): def check_css_globally_unique(repo_root, paths): - # type: (str, List[str]) -> List[rules.Error] + # type: (Text, List[Text]) -> List[rules.Error] """ Checks that CSS filenames are sufficiently unique @@ -243,32 +248,29 @@ def check_css_globally_unique(repo_root, paths): :returns: a list of errors found in ``paths`` """ - test_files = defaultdict(set) # type: Dict[Union[bytes, Text], Set[str]] - ref_files = defaultdict(set) # type: Dict[Union[bytes, Text], Set[str]] - support_files = defaultdict(set) # type: Dict[Union[bytes, Text], Set[str]] + test_files = defaultdict(set) # type: Dict[Text, Set[Text]] + ref_files = defaultdict(set) # type: Dict[Text, Set[Text]] + support_files = defaultdict(set) # type: Dict[Text, Set[Text]] for path in paths: if os.name == "nt": - if isinstance(path, binary_type): - path = path.replace(b"\\", b"/") - else: - path = path.replace(u"\\", u"/") + path = path.replace(u"\\", u"/") - if not path.startswith("css/"): + if not path.startswith(u"css/"): continue - source_file = SourceFile(repo_root, path, "/") + source_file = SourceFile(repo_root, path, u"/") if source_file.name_is_non_test: # If we're name_is_non_test for a reason apart from support, ignore it. # We care about support because of the requirement all support files in css/ to be in # a support directory; see the start of check_parsed. - offset = path.find("/support/") + offset = path.find(u"/support/") if offset == -1: continue parts = source_file.dir_path.split(os.path.sep) if (parts[0] in source_file.root_dir_non_test or - any(item in source_file.dir_non_test - {"support"} for item in parts) or + any(item in source_file.dir_non_test - {u"support"} for item in parts) or any(parts[:len(non_test_path)] == list(non_test_path) for non_test_path in source_file.dir_path_non_test)): continue @@ -277,11 +279,8 @@ def check_css_globally_unique(repo_root, paths): elif source_file.name_is_reference: ref_files[source_file.name].add(path) else: - test_name = source_file.name # type: Union[bytes, Text] - if isinstance(test_name, bytes): - test_name = test_name.replace(b'-manual', b'') - else: - test_name = test_name.replace(u'-manual', u'') + test_name = source_file.name # type: Text + test_name = test_name.replace(u'-manual', u'') test_files[test_name].add(path) errors = [] @@ -290,9 +289,9 @@ def check_css_globally_unique(repo_root, paths): if len(colliding) > 1: if not _all_files_equal([os.path.join(repo_root, x) for x in colliding]): # Only compute by_spec if there are prima-facie collisions because of cost - by_spec = defaultdict(set) # type: Dict[Text, Set[str]] + by_spec = defaultdict(set) # type: Dict[Text, Set[Text]] for path in colliding: - source_file = SourceFile(repo_root, path, "/") + source_file = SourceFile(repo_root, path, u"/") for link in source_file.spec_links: for r in (drafts_csswg_re, w3c_tr_re, w3c_dev_re): m = r.match(link) @@ -324,7 +323,7 @@ def check_css_globally_unique(repo_root, paths): def check_unique_testharness_basenames(repo_root, paths): - # type: (str, List[str]) -> List[rules.Error] + # type: (Text, List[Text]) -> List[rules.Error] """ Checks that all testharness files have unique basename paths. @@ -359,7 +358,7 @@ def check_unique_testharness_basenames(repo_root, paths): def parse_ignorelist(f): - # type: (IO[bytes]) -> Tuple[Ignorelist, Set[Text]] + # type: (IO[Text]) -> Tuple[Ignorelist, Set[Text]] """ Parse the ignorelist file given by `f`, and return the parsed structure. @@ -440,7 +439,7 @@ def filter_ignorelist_errors(data, errors): def check_regexp_line(repo_root, path, f): - # type: (str, str, IO[bytes]) -> List[rules.Error] + # type: (Text, Text, IO[bytes]) -> List[rules.Error] errors = [] # type: List[rules.Error] applicable_regexps = [regexp for regexp in regexps if regexp.applies(path)] @@ -454,7 +453,7 @@ def check_regexp_line(repo_root, path, f): def check_parsed(repo_root, path, f): - # type: (str, str, IO[bytes]) -> List[rules.Error] + # type: (Text, Text, IO[bytes]) -> List[rules.Error] source_file = SourceFile(repo_root, path, "/", contents=f.read()) errors = [] # type: List[rules.Error] @@ -630,7 +629,7 @@ def check(self, root): ast_checkers = [item() for item in [OpenModeCheck]] def check_python_ast(repo_root, path, f): - # type: (str, str, IO[bytes]) -> List[rules.Error] + # type: (Text, Text, IO[bytes]) -> List[rules.Error] # *.quic.py are Python 3 only and cannot be parsed by Python 2. if not path.endswith(".py") or path.endswith(".quic.py"): return [] @@ -652,7 +651,7 @@ def check_python_ast(repo_root, path, f): def check_global_metadata(value): - # type: (str) -> Iterable[Tuple[Type[rules.Rule], Tuple[Any, ...]]] + # type: (bytes) -> Iterable[Tuple[Type[rules.Rule], Tuple[Any, ...]]] global_values = {item.strip().decode("utf8") for item in value.split(b",") if item.strip()} # TODO: this could check for duplicates and such @@ -662,7 +661,7 @@ def check_global_metadata(value): def check_script_metadata(repo_root, path, f): - # type: (str, str, IO[bytes]) -> List[rules.Error] + # type: (Text, Text, IO[bytes]) -> List[rules.Error] if path.endswith((".worker.js", ".any.js")): meta_re = js_meta_re broken_metadata = broken_js_metadata @@ -713,7 +712,7 @@ def check_script_metadata(repo_root, path, f): def check_ahem_system_font(repo_root, path, f): - # type: (str, str, IO[bytes]) -> List[rules.Error] + # type: (Text, Text, IO[bytes]) -> List[rules.Error] if not path.endswith((".html", ".htm", ".xht", ".xhtml")): return [] contents = f.read() @@ -724,7 +723,7 @@ def check_ahem_system_font(repo_root, path, f): def check_path(repo_root, path): - # type: (str, str) -> List[rules.Error] + # type: (Text, Text) -> List[rules.Error] """ Runs lints that check the file path. @@ -740,7 +739,7 @@ def check_path(repo_root, path): def check_all_paths(repo_root, paths): - # type: (str, List[str]) -> List[rules.Error] + # type: (Text, List[Text]) -> List[rules.Error] """ Runs lints that check all paths globally. @@ -756,7 +755,7 @@ def check_all_paths(repo_root, paths): def check_file_contents(repo_root, path, f): - # type: (str, str, IO[bytes]) -> List[rules.Error] + # type: (Text, Text, IO[bytes]) -> List[rules.Error] """ Runs lints that check the file contents. @@ -824,25 +823,23 @@ def output_error_count(error_count): def changed_files(wpt_root): - # type: (str) -> List[Text] + # type: (Text) -> List[Text] revish = testfiles.get_revish(revish=None) changed, _ = testfiles.files_changed(revish, None, include_uncommitted=True, include_new=True) return [os.path.relpath(item, wpt_root) for item in changed] def lint_paths(kwargs, wpt_root): - # type: (Dict[str, Any], str) -> List[str] - if kwargs.get(str("paths")): + # type: (Dict[Text, Any], Text) -> List[Text] + if kwargs.get("paths"): paths = [] - for path in kwargs.get(str("paths"), []): + for path in kwargs.get("paths", []): if os.path.isdir(path): path_dir = list(all_filesystem_paths(wpt_root, path)) paths.extend(path_dir) elif os.path.isfile(path): paths.append(os.path.relpath(os.path.abspath(path), wpt_root)) - - - elif kwargs[str("all")]: + elif kwargs["all"]: paths = list(all_filesystem_paths(wpt_root)) else: changed_paths = changed_files(wpt_root) @@ -852,7 +849,7 @@ def lint_paths(kwargs, wpt_root): if path == "lint.ignore" or path.startswith("tools/lint/"): force_all = True break - paths = (list(changed_paths) if not force_all # type: ignore + paths = (list(changed_paths) if not force_all else list(all_filesystem_paths(wpt_root))) return paths @@ -867,43 +864,47 @@ def create_parser(): help="Output machine-readable JSON format") parser.add_argument("--markdown", action="store_true", help="Output markdown") - parser.add_argument("--repo-root", help="The WPT directory. Use this " + parser.add_argument("--repo-root", type=ensure_text, + help="The WPT directory. Use this " "option if the lint script exists outside the repository") - parser.add_argument("--ignore-glob", help="Additional file glob to ignore.") + parser.add_argument("--ignore-glob", type=ensure_text, + help="Additional file glob to ignore.") parser.add_argument("--all", action="store_true", help="If no paths are passed, try to lint the whole " "working directory, not just files that changed") return parser -def main(**kwargs): +def main(**kwargs_str): # type: (**Any) -> int + kwargs = {ensure_text(key): value for key, value in iteritems(kwargs_str)} + assert logger is not None - if kwargs.get(str("json")) and kwargs.get(str("markdown")): + if kwargs.get("json") and kwargs.get("markdown"): logger.critical("Cannot specify --json and --markdown") sys.exit(2) - repo_root = kwargs.get(str('repo_root')) or localpaths.repo_root - output_format = {(True, False): str("json"), - (False, True): str("markdown"), - (False, False): str("normal")}[(kwargs.get(str("json"), False), - kwargs.get(str("markdown"), False))] + repo_root = kwargs.get('repo_root') or localpaths.repo_root + output_format = {(True, False): "json", + (False, True): "markdown", + (False, False): "normal"}[(kwargs.get("json", False), + kwargs.get("markdown", False))] if output_format == "markdown": setup_logging(True) paths = lint_paths(kwargs, repo_root) - ignore_glob = kwargs.get(str("ignore_glob")) or str() + ignore_glob = kwargs.get("ignore_glob") or "" - return lint(repo_root, paths, output_format, str(ignore_glob)) + return lint(repo_root, paths, output_format, ignore_glob) -def lint(repo_root, paths, output_format, ignore_glob=str()): - # type: (str, List[str], str, str) -> int +def lint(repo_root, paths, output_format, ignore_glob=""): + # type: (Text, List[Text], Text, Text) -> int error_count = defaultdict(int) # type: Dict[Text, int] last = None - with open(os.path.join(repo_root, "lint.ignore")) as f: + with io.open(os.path.join(repo_root, "lint.ignore"), "r") as f: ignorelist, skipped_files = parse_ignorelist(f) if ignore_glob: @@ -948,8 +949,8 @@ def process_errors(errors): last = process_errors(errors) or last if not os.path.isdir(abs_path): - with open(abs_path, 'rb') as f: - errors = check_file_contents(repo_root, path, f) + with io.open(abs_path, 'rb') as test_file: + errors = check_file_contents(repo_root, path, test_file) last = process_errors(errors) or last errors = check_all_paths(repo_root, paths) diff --git a/tools/lint/rules.py b/tools/lint/rules.py index 695f6cd4e5328c..6ffd749b9ad539 100644 --- a/tools/lint/rules.py +++ b/tools/lint/rules.py @@ -354,7 +354,7 @@ def __init__(self): self._re = re.compile(self.pattern) # type: Pattern[bytes] def applies(self, path): - # type: (str) -> bool + # type: (Text) -> bool return (self.file_extensions is None or os.path.splitext(path)[1] in self.file_extensions) diff --git a/tools/lint/tests/test_lint.py b/tools/lint/tests/test_lint.py index 10d9728b405751..3741f20097c69f 100644 --- a/tools/lint/tests/test_lint.py +++ b/tools/lint/tests/test_lint.py @@ -483,12 +483,12 @@ def test_ignore_glob(caplog): def test_all_filesystem_paths(): with mock.patch( 'tools.lint.lint.walk', - return_value=[('', - [('dir_a', None), ('dir_b', None)], - [('file_a', None), ('file_b', None)]), - ('dir_a', + return_value=[(b'', + [(b'dir_a', None), (b'dir_b', None)], + [(b'file_a', None), (b'file_b', None)]), + (b'dir_a', [], - [('file_c', None), ('file_d', None)])] + [(b'file_c', None), (b'file_d', None)])] ): got = list(lint_mod.all_filesystem_paths('.')) assert got == ['file_a', @@ -500,12 +500,12 @@ def test_all_filesystem_paths(): def test_filesystem_paths_subdir(): with mock.patch( 'tools.lint.lint.walk', - return_value=[('', - [('dir_a', None), ('dir_b', None)], - [('file_a', None), ('file_b', None)]), - ('dir_a', + return_value=[(b'', + [(b'dir_a', None), (b'dir_b', None)], + [(b'file_a', None), (b'file_b', None)]), + (b'dir_a', [], - [('file_c', None), ('file_d', None)])] + [(b'file_c', None), (b'file_d', None)])] ): got = list(lint_mod.all_filesystem_paths('.', 'dir')) assert got == [os.path.join('dir', 'file_a'), diff --git a/tools/localpaths.py b/tools/localpaths.py index ce3b41e300c9f1..e1aa644988b2be 100644 --- a/tools/localpaths.py +++ b/tools/localpaths.py @@ -1,8 +1,8 @@ import os import sys +from six import ensure_text here = os.path.abspath(os.path.split(__file__)[0]) -repo_root = os.path.abspath(os.path.join(here, os.pardir)) sys.path.insert(0, os.path.join(here)) sys.path.insert(0, os.path.join(here, "wptserve")) @@ -28,3 +28,5 @@ if sys.version_info[0] == 2: sys.path.insert(0, os.path.join(here, "third_party", "enum")) + +repo_root = ensure_text(os.path.abspath(os.path.join(here, os.pardir))) diff --git a/tools/manifest/XMLParser.py b/tools/manifest/XMLParser.py index 6f5ff4d35dab67..80aa3b5b920b67 100644 --- a/tools/manifest/XMLParser.py +++ b/tools/manifest/XMLParser.py @@ -6,6 +6,8 @@ from xml.parsers import expat import xml.etree.ElementTree as etree # noqa: N813 +from six import text_type + MYPY = False if MYPY: # MYPY is set to True when run under Mypy. @@ -24,9 +26,9 @@ def _wrap_error(e): err.position = e.lineno, e.offset raise err -_names = {} # type: Dict[str, str] +_names = {} # type: Dict[Text, Text] def _fixname(key): - # type: (str) -> str + # type: (Text) -> Text try: name = _names[key] except KeyError: @@ -54,7 +56,7 @@ class XMLParser(object): Python does, rather than just those supported by expat. """ def __init__(self, encoding=None): - # type: (Optional[str]) -> None + # type: (Optional[Text]) -> None self._parser = expat.ParserCreate(encoding, "}") self._target = etree.TreeBuilder() # parser settings @@ -63,6 +65,9 @@ def __init__(self, encoding=None): self._parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) # parser callbacks self._parser.XmlDeclHandler = self._xml_decl + # mypy generates a type error in py2 because it wants + # StartElementHandler to take str, List[str]. But the code + # seems to always pass in Text to this function self._parser.StartElementHandler = self._start self._parser.EndElementHandler = self._end self._parser.CharacterDataHandler = self._data @@ -70,32 +75,33 @@ def __init__(self, encoding=None): self._parser.SkippedEntityHandler = self._skipped # type: ignore # used for our horrible re-encoding hack self._fed_data = [] # type: Optional[List[bytes]] - self._read_encoding = None # type: Optional[str] + self._read_encoding = None # type: Optional[Text] def _xml_decl(self, version, encoding, standalone): - # type: (str, Optional[str], int) -> None + # type: (Text, Optional[Text], int) -> None self._read_encoding = encoding def _start(self, tag, attrib_in): - # type: (str, List[str]) -> etree.Element + # type: (Text, List[str]) -> etree.Element + assert isinstance(tag, text_type) self._fed_data = None tag = _fixname(tag) - attrib = OrderedDict() # type: Dict[Union[str, Text], Union[str, Text]] + attrib = OrderedDict() # type: Dict[Union[bytes, Text], Union[bytes, Text]] if attrib_in: for i in range(0, len(attrib_in), 2): attrib[_fixname(attrib_in[i])] = attrib_in[i+1] return self._target.start(tag, attrib) def _data(self, text): - # type: (str) -> None + # type: (Text) -> None self._target.data(text) def _end(self, tag): - # type: (str) -> etree.Element + # type: (Text) -> etree.Element return self._target.end(_fixname(tag)) def _external(self, context, base, system_id, public_id): - # type: (str, Optional[str], Optional[str], Optional[str]) -> bool + # type: (Text, Optional[Text], Optional[Text], Optional[Text]) -> bool if public_id in { "-//W3C//DTD XHTML 1.0 Transitional//EN", "-//W3C//DTD XHTML 1.1//EN", @@ -117,7 +123,7 @@ def _external(self, context, base, system_id, public_id): return True def _skipped(self, name, is_parameter_entity): - # type: (str, bool) -> None + # type: (Text, bool) -> None err = expat.error("undefined entity %s: line %d, column %d" % (name, self._parser.ErrorLineNumber, self._parser.ErrorColumnNumber)) @@ -127,7 +133,7 @@ def _skipped(self, name, is_parameter_entity): raise err def feed(self, data): - # type: (str) -> None + # type: (bytes) -> None if self._fed_data is not None: self._fed_data.append(data) try: diff --git a/tools/manifest/download.py b/tools/manifest/download.py index f8be03efb2750a..9d763181d82db7 100644 --- a/tools/manifest/download.py +++ b/tools/manifest/download.py @@ -35,12 +35,12 @@ def abs_path(path): - # type: (str) -> str + # type: (Text) -> Text return os.path.abspath(os.path.expanduser(path)) def should_download(manifest_path, rebuild_time=timedelta(days=5)): - # type: (str, timedelta) -> bool + # type: (Text, timedelta) -> bool if not os.path.exists(manifest_path): return True mtime = datetime.fromtimestamp(os.path.getmtime(manifest_path)) @@ -51,7 +51,7 @@ def should_download(manifest_path, rebuild_time=timedelta(days=5)): def merge_pr_tags(repo_root, max_count=50): - # type: (str, int) -> List[Text] + # type: (Text, int) -> List[Text] gitfunc = git(repo_root) tags = [] # type: List[Text] if gitfunc is None: @@ -64,7 +64,7 @@ def merge_pr_tags(repo_root, max_count=50): def score_name(name): - # type: (str) -> Optional[int] + # type: (Text) -> Optional[int] """Score how much we like each filename, lower wins, None rejects""" # Accept both ways of naming the manifest asset, even though @@ -111,7 +111,7 @@ def github_url(tags): def download_manifest( - manifest_path, # type: str + manifest_path, # type: Text tags_func, # type: Callable[[], List[Text]] url_func, # type: Callable[[List[Text]], Optional[List[Text]]] force=False # type: bool @@ -159,7 +159,8 @@ def download_manifest( fileobj = io.BytesIO(resp.read()) try: with gzip.GzipFile(fileobj=fileobj) as gzf: - decompressed = gzf.read() # type: ignore + data = read_gzf(gzf) # type: ignore + decompressed = data except IOError: logger.warning("Failed to decompress downloaded file") continue @@ -180,6 +181,12 @@ def download_manifest( return True +def read_gzf(gzf): # type: ignore + # This is working around a mypy problem in Python 2: + # "Call to untyped function "read" in typed context" + return gzf.read() + + def create_parser(): # type: () -> argparse.ArgumentParser parser = argparse.ArgumentParser() @@ -194,7 +201,7 @@ def create_parser(): def download_from_github(path, tests_root, force=False): - # type: (str, str, bool) -> bool + # type: (Text, Text, bool) -> bool return download_manifest(path, lambda: merge_pr_tags(tests_root), github_url, force=force) diff --git a/tools/manifest/item.py b/tools/manifest/item.py index e232925ed635c6..efb49d7f4ec9ef 100644 --- a/tools/manifest/item.py +++ b/tools/manifest/item.py @@ -198,7 +198,7 @@ def quic(self): @property def script_metadata(self): - # type: () -> Optional[Text] + # type: () -> Optional[List[Tuple[Text, Text]]] return self._extras.get("script_metadata") def to_json(self): diff --git a/tools/manifest/manifest.py b/tools/manifest/manifest.py index d5d8d53b08b937..449cd245acbc59 100644 --- a/tools/manifest/manifest.py +++ b/tools/manifest/manifest.py @@ -7,7 +7,6 @@ from multiprocessing import Pool, cpu_count from six import ( PY3, - binary_type, ensure_text, iteritems, itervalues, @@ -67,15 +66,15 @@ class InvalidCacheError(Exception): pass -item_classes = {"testharness": TestharnessTest, - "reftest": RefTest, - "print-reftest": PrintRefTest, - "crashtest": CrashTest, - "manual": ManualTest, - "wdspec": WebDriverSpecTest, - "conformancechecker": ConformanceCheckerTest, - "visual": VisualTest, - "support": SupportFile} # type: Dict[str, Type[ManifestItem]] +item_classes = {u"testharness": TestharnessTest, + u"reftest": RefTest, + u"print-reftest": PrintRefTest, + u"crashtest": CrashTest, + u"manual": ManualTest, + u"wdspec": WebDriverSpecTest, + u"conformancechecker": ConformanceCheckerTest, + u"visual": VisualTest, + u"support": SupportFile} # type: Dict[Text, Type[ManifestItem]] def compute_manifest_items(source_file): @@ -104,7 +103,7 @@ def __init__(self, manifest): self.json_obj = None # type: None def __setitem__(self, key, value): - # type: (str, TypeData) -> None + # type: (Text, TypeData) -> None if self.initialized: raise AttributeError dict.__setitem__(self, key, value) @@ -120,7 +119,7 @@ def paths(self): return rv def type_by_path(self): - # type: () -> Dict[Tuple[Text, ...], str] + # type: () -> Dict[Tuple[Text, ...], Text] rv = {} for item_type, item_data in iteritems(self): for item in item_data: @@ -128,26 +127,25 @@ def type_by_path(self): return rv - class Manifest(object): - def __init__(self, tests_root=None, url_base="/"): - # type: (Optional[str], Text) -> None + def __init__(self, tests_root, url_base="/"): + # type: (Text, Text) -> None assert url_base is not None self._data = ManifestData(self) # type: ManifestData - self.tests_root = tests_root # type: Optional[str] + self.tests_root = tests_root # type: Text self.url_base = url_base # type: Text def __iter__(self): - # type: () -> Iterator[Tuple[str, Text, Set[ManifestItem]]] + # type: () -> Iterator[Tuple[Text, Text, Set[ManifestItem]]] return self.itertypes() def itertypes(self, *types): - # type: (*str) -> Iterator[Tuple[str, Text, Set[ManifestItem]]] + # type: (*Text) -> Iterator[Tuple[Text, Text, Set[ManifestItem]]] for item_type in (types or sorted(self._data.keys())): for path in self._data[item_type]: - str_path = os.sep.join(path) + rel_path = os.sep.join(path) tests = self._data[item_type][path] - yield item_type, str_path, tests + yield item_type, rel_path, tests def iterpath(self, path): # type: (Text) -> Iterable[ManifestItem] @@ -171,7 +169,7 @@ def iterdir(self, dir_name): yield test def update(self, tree, parallel=True): - # type: (Iterable[Tuple[Union[SourceFile, bytes], bool]], bool) -> bool + # type: (Iterable[Tuple[Text, Optional[Text], bool]], bool) -> bool """Update the manifest given an iterable of items that make up the updated manifest. The iterable must either generate tuples of the form (SourceFile, True) for paths @@ -191,9 +189,7 @@ def update(self, tree, parallel=True): to_update = [] - for path_str, file_hash, updated in tree: - assert isinstance(path_str, (binary_type, text_type)) - path = ensure_text(path_str) + for path, file_hash, updated in tree: path_parts = tuple(path.split(os.path.sep)) is_new = path_parts not in remaining_manifest_paths @@ -208,6 +204,7 @@ def update(self, tree, parallel=True): if not updated: remaining_manifest_paths.remove(path_parts) else: + assert self.tests_root is not None source_file = SourceFile(self.tests_root, path, self.url_base, @@ -290,7 +287,7 @@ def to_json(self, caller_owns_obj=True): @classmethod def from_json(cls, tests_root, obj, types=None, callee_owns_obj=False): - # type: (str, Dict[Text, Any], Optional[Container[Text]], bool) -> Manifest + # type: (Text, Dict[Text, Any], Optional[Container[Text]], bool) -> Manifest """Load a manifest from a JSON object This loads a manifest for a given local test_root path from an @@ -327,19 +324,19 @@ def from_json(cls, tests_root, obj, types=None, callee_owns_obj=False): def load(tests_root, manifest, types=None): - # type: (str, Union[IO[bytes], str], Optional[Container[Text]]) -> Optional[Manifest] + # type: (Text, Union[IO[bytes], Text], Optional[Container[Text]]) -> Optional[Manifest] logger = get_logger() logger.warning("Prefer load_and_update instead") return _load(logger, tests_root, manifest, types) -__load_cache = {} # type: Dict[str, Manifest] +__load_cache = {} # type: Dict[Text, Manifest] def _load(logger, # type: Logger - tests_root, # type: str - manifest, # type: Union[IO[bytes], str] + tests_root, # type: Text + manifest, # type: Union[IO[bytes], Text] types=None, # type: Optional[Container[Text]] allow_cached=True # type: bool ): @@ -376,13 +373,13 @@ def _load(logger, # type: Logger return rv -def load_and_update(tests_root, # type: bytes - manifest_path, # type: bytes +def load_and_update(tests_root, # type: Union[Text, bytes] + manifest_path, # type: Union[Text, bytes] url_base, # type: Text update=True, # type: bool rebuild=False, # type: bool - metadata_path=None, # type: Optional[bytes] - cache_root=None, # type: Optional[bytes] + metadata_path=None, # type: Optional[Union[Text, bytes]] + cache_root=None, # type: Optional[Union[Text, bytes]] working_copy=True, # type: bool types=None, # type: Optional[Container[Text]] write_manifest=True, # type: bool @@ -390,6 +387,43 @@ def load_and_update(tests_root, # type: bytes parallel=True # type: bool ): # type: (...) -> Manifest + + # This function is now a facade for the purposes of type conversion, so that + # the external API can accept paths as text or (utf8) bytes, but internal + # functions always use Text. + + metadata_path_text = ensure_text(metadata_path) if metadata_path is not None else None + cache_root_text = ensure_text(cache_root) if cache_root is not None else None + + return _load_and_update(ensure_text(tests_root), + ensure_text(manifest_path), + url_base, + update=update, + rebuild=rebuild, + metadata_path=metadata_path_text, + cache_root=cache_root_text, + working_copy=working_copy, + types=types, + write_manifest=write_manifest, + allow_cached=allow_cached, + parallel=parallel) + + +def _load_and_update(tests_root, # type: Text + manifest_path, # type: Text + url_base, # type: Text + update=True, # type: bool + rebuild=False, # type: bool + metadata_path=None, # type: Optional[Text] + cache_root=None, # type: Optional[Text] + working_copy=True, # type: bool + types=None, # type: Optional[Container[Text]] + write_manifest=True, # type: bool + allow_cached=True, # type: bool + parallel=True # type: bool + ): + # type: (...) -> Manifest + logger = get_logger() manifest = None @@ -435,7 +469,7 @@ def load_and_update(tests_root, # type: bytes def write(manifest, manifest_path): - # type: (Manifest, bytes) -> None + # type: (Manifest, Text) -> None dir_name = os.path.dirname(manifest_path) if not os.path.exists(dir_name): os.makedirs(dir_name) diff --git a/tools/manifest/sourcefile.py b/tools/manifest/sourcefile.py index 524eb0bddb255f..09316bd052c066 100644 --- a/tools/manifest/sourcefile.py +++ b/tools/manifest/sourcefile.py @@ -2,7 +2,8 @@ import re import os from collections import deque -from six import binary_type, ensure_text, iteritems, text_type +from io import BytesIO +from six import binary_type, iteritems, text_type from six.moves.urllib.parse import urljoin from fnmatch import fnmatch @@ -10,7 +11,6 @@ if MYPY: # MYPY is set to True when run under Mypy. from typing import Any - from typing import AnyStr from typing import BinaryIO from typing import Callable from typing import Deque @@ -43,7 +43,7 @@ TestharnessTest, VisualTest, WebDriverSpecTest) -from .utils import ContextManagerBytesIO, cached_property +from .utils import cached_property wd_pattern = "*.py" js_meta_re = re.compile(br"//\s*META:\s*(\w*)=(.*)$") @@ -185,22 +185,22 @@ def _parse_xml(f): class SourceFile(object): - parsers = {"html":_parse_html, - "xhtml":_parse_xml, - "svg":_parse_xml} # type: Dict[Text, Callable[[BinaryIO], ElementTree.ElementTree]] + parsers = {u"html":_parse_html, + u"xhtml":_parse_xml, + u"svg":_parse_xml} # type: Dict[Text, Callable[[BinaryIO], ElementTree.ElementTree]] - root_dir_non_test = {"common"} + root_dir_non_test = {u"common"} - dir_non_test = {"resources", - "support", - "tools"} + dir_non_test = {u"resources", + u"support", + u"tools"} - dir_path_non_test = {("css21", "archive"), - ("css", "CSS2", "archive"), - ("css", "common")} # type: Set[Tuple[bytes, ...]] + dir_path_non_test = {(u"css21", u"archive"), + (u"css", u"CSS2", u"archive"), + (u"css", u"common")} # type: Set[Tuple[Text, ...]] - def __init__(self, tests_root, rel_path_str, url_base, hash=None, contents=None): - # type: (AnyStr, AnyStr, Text, Optional[Text], Optional[bytes]) -> None + def __init__(self, tests_root, rel_path, url_base, hash=None, contents=None): + # type: (Text, Text, Text, Optional[Text], Optional[bytes]) -> None """Object representing a file in a source tree. :param tests_root: Path to the root of the source tree @@ -209,7 +209,6 @@ def __init__(self, tests_root, rel_path_str, url_base, hash=None, contents=None) :param contents: Byte array of the contents of the file or ``None``. """ - rel_path = ensure_text(rel_path_str) assert not os.path.isabs(rel_path), rel_path if os.name == "nt": # do slash normalization on Windows @@ -224,7 +223,7 @@ def __init__(self, tests_root, rel_path_str, url_base, hash=None, contents=None) meta_flags = name.split(".")[1:] - self.tests_root = ensure_text(tests_root) # type: Text + self.tests_root = tests_root # type: Text self.rel_path = rel_path # type: Text self.dir_path = dir_path # type: Text self.filename = filename # type: Text @@ -249,7 +248,7 @@ def __getstate__(self): return rv def name_prefix(self, prefix): - # type: (bytes) -> bool + # type: (Text) -> bool """Check if the filename starts with a given prefix :param prefix: The prefix to check""" @@ -270,13 +269,8 @@ def open(self): * the contents specified in the constructor, if any; * a File object opened for reading the file contents. """ - if self.contents is not None: - wrapped = ContextManagerBytesIO(self.contents) - if MYPY: - file_obj = cast(BinaryIO, wrapped) - else: - file_obj = wrapped + file_obj = BytesIO(self.contents) # type: BinaryIO else: file_obj = open(self.path, 'rb') return file_obj @@ -337,11 +331,11 @@ def name_is_non_test(self): """Check if the file name matches the conditions for the file to be a non-test file""" return (self.is_dir() or - self.name_prefix("MANIFEST") or - self.filename == "META.yml" or - self.filename.startswith(".") or - self.filename.endswith(".headers") or - self.filename.endswith(".ini") or + self.name_prefix(u"MANIFEST") or + self.filename == u"META.yml" or + self.filename.startswith(u".") or + self.filename.endswith(u".headers") or + self.filename.endswith(u".ini") or self.in_non_test_dir()) @property @@ -441,14 +435,14 @@ def markup_type(self): if not ext: return None - if ext[0] == ".": + if ext[0] == u".": ext = ext[1:] - if ext in ["html", "htm"]: - return "html" - if ext in ["xhtml", "xht", "xml"]: - return "xhtml" - if ext == "svg": - return "svg" + if ext in [u"html", u"htm"]: + return u"html" + if ext in [u"xhtml", u"xht", u"xml"]: + return u"xhtml" + if ext == u"svg": + return u"svg" return None @cached_property diff --git a/tools/manifest/utils.py b/tools/manifest/utils.py index aefc2c94f40e1a..36c1a983101a8e 100644 --- a/tools/manifest/utils.py +++ b/tools/manifest/utils.py @@ -2,16 +2,12 @@ import platform import subprocess -from six import BytesIO - MYPY = False if MYPY: # MYPY is set to True when run under Mypy. from typing import Text from typing import Callable - from typing import AnyStr from typing import Any - from typing import BinaryIO from typing import Generic from typing import TypeVar from typing import Optional @@ -22,72 +18,62 @@ T = object() Generic[T] = object + def rel_path_to_url(rel_path, url_base="/"): - # type: (bytes, Text) -> Text + # type: (Text, Text) -> Text assert not os.path.isabs(rel_path), rel_path - if url_base[0] != "/": - url_base = "/" + url_base - if url_base[-1] != "/": - url_base += "/" - return url_base + rel_path.replace(os.sep, "/") + if url_base[0] != u"/": + url_base = u"/" + url_base + if url_base[-1] != u"/": + url_base += u"/" + return url_base + rel_path.replace(os.sep, u"/") def from_os_path(path): - # type: (AnyStr) -> AnyStr - assert os.path.sep == "/" or platform.system() == "Windows" - if "/" == os.path.sep: + # type: (Text) -> Text + assert os.path.sep == u"/" or platform.system() == "Windows" + if u"/" == os.path.sep: rv = path else: - rv = path.replace(os.path.sep, "/") - if "\\" in rv: + rv = path.replace(os.path.sep, u"/") + if u"\\" in rv: raise ValueError("path contains \\ when separator is %s" % os.path.sep) return rv def to_os_path(path): - # type: (AnyStr) -> AnyStr - assert os.path.sep == "/" or platform.system() == "Windows" - if "\\" in path: + # type: (Text) -> Text + assert os.path.sep == u"/" or platform.system() == "Windows" + if u"\\" in path: raise ValueError("normalised path contains \\") - if "/" == os.path.sep: + if u"/" == os.path.sep: return path - return path.replace("/", os.path.sep) + return path.replace(u"/", os.path.sep) def git(path): - # type: (bytes) -> Optional[Callable[..., Text]] + # type: (Text) -> Optional[Callable[..., Text]] def gitfunc(cmd, *args): - # type: (bytes, *bytes) -> Text - full_cmd = ["git", cmd] + list(args) + # type: (Text, *Text) -> Text + full_cmd = [u"git", cmd] + list(args) try: return subprocess.check_output(full_cmd, cwd=path, stderr=subprocess.STDOUT).decode('utf8') except Exception as e: if platform.uname()[0] == "Windows" and isinstance(e, WindowsError): - full_cmd[0] = "git.bat" + full_cmd[0] = u"git.bat" return subprocess.check_output(full_cmd, cwd=path, stderr=subprocess.STDOUT).decode('utf8') else: raise try: # this needs to be a command that fails if we aren't in a git repo - gitfunc("rev-parse", "--show-toplevel") + gitfunc(u"rev-parse", u"--show-toplevel") except (subprocess.CalledProcessError, OSError): return None else: return gitfunc -class ContextManagerBytesIO(BytesIO): # type: ignore - def __enter__(self): - # type: () -> BinaryIO - return self # type: ignore - - def __exit__(self, *args, **kwargs): - # type: (*Any, **Any) -> bool - self.close() - return True - - class cached_property(Generic[T]): def __init__(self, func): # type: (Callable[[Any], T]) -> None diff --git a/tools/manifest/vcs.py b/tools/manifest/vcs.py index d36fe570f07347..80c0512807a959 100644 --- a/tools/manifest/vcs.py +++ b/tools/manifest/vcs.py @@ -7,7 +7,6 @@ from six import with_metaclass, PY2 -from .sourcefile import SourceFile from .utils import git try: @@ -20,7 +19,7 @@ MYPY = False if MYPY: # MYPY is set to True when run under Mypy. - from typing import Dict, Optional, List, Set, Text, Iterable, Any, Tuple, Union, Iterator + from typing import Dict, Optional, List, Set, Text, Iterable, Any, Tuple, Iterator from .manifest import Manifest # cyclic import under MYPY guard if PY2: stat_result = Any @@ -30,10 +29,10 @@ def get_tree(tests_root, manifest, manifest_path, cache_root, working_copy=True, rebuild=False): - # type: (bytes, Manifest, Optional[bytes], Optional[bytes], bool, bool) -> FileSystem + # type: (Text, Manifest, Optional[Text], Optional[Text], bool, bool) -> FileSystem tree = None if cache_root is None: - cache_root = os.path.join(tests_root, ".wptcache") + cache_root = os.path.join(tests_root, u".wptcache") if not os.path.exists(cache_root): try: os.makedirs(cache_root) @@ -54,7 +53,7 @@ def get_tree(tests_root, manifest, manifest_path, cache_root, class GitHasher(object): def __init__(self, path): - # type: (bytes) -> None + # type: (Text) -> None self.git = git(path) def _local_changes(self): @@ -90,32 +89,34 @@ def hash_cache(self): class FileSystem(object): - def __init__(self, root, url_base, cache_path, manifest_path=None, rebuild=False): - # type: (bytes, Text, Optional[bytes], Optional[bytes], bool) -> None - self.root = os.path.abspath(root) + def __init__(self, tests_root, url_base, cache_path, manifest_path=None, rebuild=False): + # type: (Text, Text, Optional[Text], Optional[Text], bool) -> None + self.tests_root = tests_root self.url_base = url_base self.ignore_cache = None self.mtime_cache = None + tests_root_bytes = tests_root.encode("utf8") if cache_path is not None: if manifest_path is not None: - self.mtime_cache = MtimeCache(cache_path, root, manifest_path, rebuild) - if gitignore.has_ignore(root): - self.ignore_cache = GitIgnoreCache(cache_path, root, rebuild) - self.path_filter = gitignore.PathFilter(self.root, - extras=[".git/"], + self.mtime_cache = MtimeCache(cache_path, tests_root, manifest_path, rebuild) + if gitignore.has_ignore(tests_root_bytes): + self.ignore_cache = GitIgnoreCache(cache_path, tests_root, rebuild) + self.path_filter = gitignore.PathFilter(tests_root_bytes, + extras=[b".git/"], cache=self.ignore_cache) - git = GitHasher(root) + git = GitHasher(tests_root) if git is not None: self.hash_cache = git.hash_cache() else: self.hash_cache = {} def __iter__(self): - # type: () -> Iterator[Tuple[bytes, Optional[bytes], bool]] + # type: () -> Iterator[Tuple[Text, Optional[Text], bool]] mtime_cache = self.mtime_cache - for dirpath, dirnames, filenames in self.path_filter(walk(self.root)): + for dirpath, dirnames, filenames in self.path_filter( + walk(self.tests_root.encode("utf8"))): for filename, path_stat in filenames: - path = os.path.join(dirpath, filename) + path = os.path.join(dirpath, filename).decode("utf8") if mtime_cache is None or mtime_cache.updated(path, path_stat): file_hash = self.hash_cache.get(path, None) yield path, file_hash, True @@ -131,7 +132,7 @@ def dump_caches(self): class CacheFile(with_metaclass(abc.ABCMeta)): def __init__(self, cache_root, tests_root, rebuild=False): - # type: (bytes, bytes, bool) -> None + # type: (Text, Text, bool) -> None self.tests_root = tests_root if not os.path.exists(cache_root): os.makedirs(cache_root) @@ -141,7 +142,7 @@ def __init__(self, cache_root, tests_root, rebuild=False): @abc.abstractproperty def file_name(self): - # type: () -> bytes + # type: () -> Text pass def dump(self): @@ -152,8 +153,8 @@ def dump(self): json.dump(self.data, f, indent=1) def load(self, rebuild=False): - # type: (bool) -> Dict[Any, Any] - data = {} # type: Dict[Any, Any] + # type: (bool) -> Dict[Text, Any] + data = {} # type: Dict[Text, Any] try: if not rebuild: with open(self.path, 'r') as f: @@ -167,22 +168,22 @@ def load(self, rebuild=False): return data def check_valid(self, data): - # type: (Dict[Any, Any]) -> Dict[Any, Any] + # type: (Dict[Text, Any]) -> Dict[Text, Any] """Check if the cached data is valid and return an updated copy of the cache containing only data that can be used.""" return data class MtimeCache(CacheFile): - file_name = "mtime.json" + file_name = u"mtime.json" def __init__(self, cache_root, tests_root, manifest_path, rebuild=False): - # type: (bytes, bytes, bytes, bool) -> None + # type: (Text, Text, Text, bool) -> None self.manifest_path = manifest_path super(MtimeCache, self).__init__(cache_root, tests_root, rebuild) def updated(self, rel_path, stat): - # type: (bytes, stat_result) -> bool + # type: (Text, stat_result) -> bool """Return a boolean indicating whether the file changed since the cache was last updated. This implicitly updates the cache with the new mtime data.""" @@ -195,12 +196,12 @@ def updated(self, rel_path, stat): def check_valid(self, data): # type: (Dict[Any, Any]) -> Dict[Any, Any] - if data.get("/tests_root") != self.tests_root: + if data.get(u"/tests_root") != self.tests_root: self.modified = True else: if self.manifest_path is not None and os.path.exists(self.manifest_path): mtime = os.path.getmtime(self.manifest_path) - if data.get("/manifest_path") != [self.manifest_path, mtime]: + if data.get(u"/manifest_path") != [self.manifest_path, mtime]: self.modified = True else: self.modified = True @@ -228,10 +229,10 @@ def check_valid(self, data): # type: (Dict[Any, Any]) -> Dict[Any, Any] ignore_path = os.path.join(self.tests_root, ".gitignore") mtime = os.path.getmtime(ignore_path) - if data.get("/gitignore_file") != [ignore_path, mtime]: + if data.get(u"/gitignore_file") != [ignore_path, mtime]: self.modified = True data = {} - data["/gitignore_file"] = [ignore_path, mtime] + data[u"/gitignore_file"] = [ignore_path, mtime] return data def __contains__(self, key): @@ -239,23 +240,23 @@ def __contains__(self, key): return key in self.data def __getitem__(self, key): - # type: (bytes) -> bool + # type: (Text) -> bool v = self.data[key] assert isinstance(v, bool) return v def __setitem__(self, key, value): - # type: (bytes, bool) -> None + # type: (Text, bool) -> None if self.data.get(key) != value: self.modified = True self.data[key] = value def __delitem__(self, key): - # type: (bytes) -> None + # type: (Text) -> None del self.data[key] def __iter__(self): - # type: () -> Iterator[bytes] + # type: () -> Iterator[Text] return iter(self.data) def __len__(self): @@ -286,7 +287,7 @@ def walk(root): relpath = os.path.relpath root = os.path.abspath(root) - stack = deque([(root, "")]) + stack = deque([(root, b"")]) while stack: dir_path, rel_path = stack.popleft() diff --git a/tools/tox.ini b/tools/tox.ini index bbec1afbb6dd2f..45358c081efbb0 100644 --- a/tools/tox.ini +++ b/tools/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py27,py35,py36,py37,py38,{py27,py35,py36,py37,py38}-flake8,{py35,py36,py37,py38}-mypy +envlist = py27,py35,py36,py37,py38,{py27,py35,py36,py37,py38}-flake8,py38-mypy2,{py35,py38}-mypy3 skipsdist=True skip_missing_interpreters = False @@ -42,26 +42,20 @@ commands = flake8 --append-config={toxinidir}/py3-flake8.ini {posargs} deps = -rrequirements_flake8.txt commands = flake8 --append-config={toxinidir}/py3-flake8.ini {posargs} -[testenv:py35-mypy] +[testenv:py38-mypy2] deps = -rrequirements_mypy.txt changedir = {toxinidir}/.. commands = mypy --config-file={toxinidir}/mypy.ini --no-incremental --py2 -p tools.manifest -p tools.lint -p tools.gitignore -[testenv:py36-mypy] +[testenv:py35-mypy3] deps = -rrequirements_mypy.txt changedir = {toxinidir}/.. commands = - mypy --config-file={toxinidir}/mypy.ini --no-incremental --py2 -p tools.manifest -p tools.lint -p tools.gitignore + mypy --config-file={toxinidir}/mypy.ini --no-incremental -p tools.manifest -p tools.lint -p tools.gitignore -[testenv:py37-mypy] +[testenv:py38-mypy3] deps = -rrequirements_mypy.txt changedir = {toxinidir}/.. commands = - mypy --config-file={toxinidir}/mypy.ini --no-incremental --py2 -p tools.manifest -p tools.lint -p tools.gitignore - -[testenv:py38-mypy] -deps = -rrequirements_mypy.txt -changedir = {toxinidir}/.. -commands = - mypy --config-file={toxinidir}/mypy.ini --no-incremental --py2 -p tools.manifest -p tools.lint -p tools.gitignore + mypy --config-file={toxinidir}/mypy.ini --no-incremental -p tools.manifest -p tools.lint -p tools.gitignore diff --git a/tools/wpt/revlist.py b/tools/wpt/revlist.py index 1893fdefa8346e..bd85612e2c2a7c 100644 --- a/tools/wpt/revlist.py +++ b/tools/wpt/revlist.py @@ -33,19 +33,22 @@ def parse_epoch(string): def get_tagged_revisions(pattern): - # type: (bytes) -> List[..., Dict] + # type: (Text) -> List[..., Dict] ''' Returns the tagged revisions indexed by the committer date. ''' git = get_git_cmd(wpt_root) args = [ pattern, - '--sort=-committerdate', - '--format=%(refname:lstrip=2) %(objectname) %(committerdate:raw)', - '--count=100000' + u'--sort=-committerdate', + u'--format=%(refname:lstrip=2) %(objectname) %(committerdate:raw)', + u'--count=100000' ] - for line in git("for-each-ref", *args).splitlines(): - tag, commit, date, _ = line.split(" ") + ref_list = git(u"for-each-ref", *args) + for line in ref_list.splitlines(): + if not line: + continue + tag, commit, date, _ = line.split(u" ") date = int(date) yield tag, commit, date @@ -81,7 +84,7 @@ def get_epoch_revisions(epoch, until, max_count): # Expected result: N,M,K,J,H,G,F,C,A cutoff_date = calculate_cutoff_date(until, epoch, epoch_offset) - for _, commit, date in get_tagged_revisions("refs/tags/merge_pr_*"): + for _, commit, date in get_tagged_revisions(u"refs/tags/merge_pr_*"): if count >= max_count: return if date < cutoff_date: diff --git a/tools/wpt/testfiles.py b/tools/wpt/testfiles.py index 47a02844edc337..c990aa21a65f6f 100644 --- a/tools/wpt/testfiles.py +++ b/tools/wpt/testfiles.py @@ -2,15 +2,14 @@ import logging import os import re -import subprocess import sys -import six from collections import OrderedDict -from six import iteritems +from six import ensure_text, ensure_str, iteritems try: from ..manifest import manifest + from ..manifest.utils import git as get_git_cmd except ValueError: # if we're not within the tools package, the above is an import from above # the top-level which raises ValueError, so reimport it with an absolute @@ -20,12 +19,12 @@ # paths set up correctly to handle both and MYPY has no knowledge of our # sys.path magic from manifest import manifest # type: ignore + from manifest.utils import git as get_git_cmd # type: ignore MYPY = False if MYPY: # MYPY is set to True when run under Mypy. from typing import Any - from typing import Callable from typing import Dict from typing import Iterable from typing import List @@ -35,30 +34,13 @@ from typing import Set from typing import Text from typing import Tuple - from typing import Union -here = os.path.dirname(__file__) +here = ensure_text(os.path.dirname(__file__)) wpt_root = os.path.abspath(os.path.join(here, os.pardir, os.pardir)) logger = logging.getLogger() -def get_git_cmd(repo_path): - # type: (bytes) -> Callable[..., Text] - """Create a function for invoking git commands as a subprocess.""" - def git(cmd, *args): - # type: (Text, *Union[bytes, Text]) -> Text - full_cmd = [u"git", cmd] + list(item.decode("utf8") if isinstance(item, bytes) else item for item in args) # type: List[Text] - try: - logger.debug(" ".join(full_cmd)) - return subprocess.check_output(full_cmd, cwd=repo_path).decode("utf8").strip() - except subprocess.CalledProcessError as e: - logger.critical("Git command exited with status %i" % e.returncode) - logger.critical(e.output) - sys.exit(1) - return git - - def display_branch_point(): # type: () -> None print(branch_point()) @@ -66,6 +48,10 @@ def display_branch_point(): def branch_point(): # type: () -> Optional[Text] + git = get_git_cmd(wpt_root) + if git is None: + raise Exception("git not found") + if (os.environ.get("GITHUB_PULL_REQUEST", "false") == "false" and os.environ.get("GITHUB_BRANCH") == "master"): # For builds on the master branch just return the HEAD commit @@ -85,7 +71,7 @@ def branch_point(): # get everything in refs/heads and refs/remotes that doesn't include HEAD not_heads = [item for item in git("rev-parse", "--not", "--branches", "--remotes").split("\n") - if item != "^%s" % head] + if item and item != "^%s" % head] # get all commits on HEAD but not reachable from anything in not_heads commits = git("rev-list", "--topo-order", "--parents", "HEAD", *not_heads) @@ -129,28 +115,33 @@ def branch_point(): logger.debug("Using first commit on another branch as the branch point") logger.debug("Branch point from master: %s" % branch_point) + if branch_point: + branch_point = branch_point.strip() return branch_point def compile_ignore_rule(rule): - # type: (str) -> Pattern[str] - rule = rule.replace(os.path.sep, "/") - parts = rule.split("/") + # type: (Text) -> Pattern[Text] + rule = rule.replace(ensure_text(os.path.sep), u"/") + parts = rule.split(u"/") re_parts = [] for part in parts: - if part.endswith("**"): - re_parts.append(re.escape(part[:-2]) + ".*") - elif part.endswith("*"): - re_parts.append(re.escape(part[:-1]) + "[^/]*") + if part.endswith(u"**"): + re_parts.append(re.escape(part[:-2]) + u".*") + elif part.endswith(u"*"): + re_parts.append(re.escape(part[:-1]) + u"[^/]*") else: re_parts.append(re.escape(part)) - return re.compile("^%s$" % "/".join(re_parts)) + return re.compile(u"^%s$" % u"/".join(re_parts)) def repo_files_changed(revish, include_uncommitted=False, include_new=False): - # type: (str, bool, bool) -> Set[Text] + # type: (Text, bool, bool) -> Set[Text] git = get_git_cmd(wpt_root) - files_list = git("diff", "--name-only", "-z", revish).split("\0") + if git is None: + raise Exception("git not found") + + files_list = git("diff", "--name-only", "-z", revish).split(u"\0") assert not files_list[-1] files = set(files_list[:-1]) @@ -174,7 +165,7 @@ def repo_files_changed(revish, include_uncommitted=False, include_new=False): def exclude_ignored(files, ignore_rules): - # type: (Iterable[Text], Optional[Sequence[str]]) -> Tuple[List[Text], List[Text]] + # type: (Iterable[Text], Optional[Sequence[Text]]) -> Tuple[List[Text], List[Text]] if ignore_rules is None: ignore_rules = [] compiled_ignore_rules = [compile_ignore_rule(item) for item in ignore_rules] @@ -194,8 +185,8 @@ def exclude_ignored(files, ignore_rules): return changed, ignored -def files_changed(revish, # type: str - ignore_rules=None, # type: Optional[Sequence[str]] +def files_changed(revish, # type: Text + ignore_rules=None, # type: Optional[Sequence[Text]] include_uncommitted=False, # type: bool include_new=False # type: bool ): @@ -216,29 +207,29 @@ def files_changed(revish, # type: str def _in_repo_root(full_path): - # type: (Union[bytes, Text]) -> bool + # type: (Text) -> bool rel_path = os.path.relpath(full_path, wpt_root) path_components = rel_path.split(os.sep) return len(path_components) < 2 def load_manifest(manifest_path=None, manifest_update=True): - # type: (Optional[str], bool) -> manifest.Manifest + # type: (Optional[Text], bool) -> manifest.Manifest if manifest_path is None: - manifest_path = os.path.join(wpt_root, "MANIFEST.json") + manifest_path = os.path.join(wpt_root, u"MANIFEST.json") return manifest.load_and_update(wpt_root, manifest_path, "/", update=manifest_update) def affected_testfiles(files_changed, # type: Iterable[Text] - skip_dirs=None, # type: Optional[Set[str]] - manifest_path=None, # type: Optional[str] + skip_dirs=None, # type: Optional[Set[Text]] + manifest_path=None, # type: Optional[Text] manifest_update=True # type: bool ): - # type: (...) -> Tuple[Set[Text], Set[str]] + # type: (...) -> Tuple[Set[Text], Set[Text]] """Determine and return list of test files that reference changed files.""" if skip_dirs is None: - skip_dirs = {"conformance-checkers", "docs", "tools"} + skip_dirs = {u"conformance-checkers", u"docs", u"tools"} affected_testfiles = set() # Exclude files that are in the repo root, because # they are not part of any test. @@ -281,7 +272,7 @@ def affected_testfiles(files_changed, # type: Iterable[Text] for interface in interfaces_changed] def affected_by_wdspec(test): - # type: (str) -> bool + # type: (Text) -> bool affected = False if test in wdspec_test_files: for support_full_path, _ in nontest_changed_paths: @@ -297,7 +288,7 @@ def affected_by_wdspec(test): return affected def affected_by_interfaces(file_contents): - # type: (Union[bytes, Text]) -> bool + # type: (Text) -> bool if len(interfaces_changed_names) > 0: if 'idlharness.js' in file_contents: for interface in interfaces_changed_names: @@ -375,27 +366,26 @@ def get_parser_affected(): def get_revish(**kwargs): - # type: (**Any) -> bytes + # type: (**Any) -> Text revish = kwargs.get("revish") if revish is None: - revish = "%s..HEAD" % branch_point() - if isinstance(revish, six.text_type): - revish = revish.encode("utf8") - assert isinstance(revish, six.binary_type) - return revish + revish = u"%s..HEAD" % branch_point() + return ensure_text(revish).strip() def run_changed_files(**kwargs): # type: (**Any) -> None revish = get_revish(**kwargs) - changed, _ = files_changed(revish, kwargs["ignore_rules"], + changed, _ = files_changed(revish, + kwargs["ignore_rules"], include_uncommitted=kwargs["modified"], include_new=kwargs["new"]) - separator = "\0" if kwargs["null"] else "\n" + separator = u"\0" if kwargs["null"] else u"\n" for item in sorted(changed): - sys.stdout.write(os.path.relpath(six.ensure_str(item), wpt_root) + separator) + line = os.path.relpath(item, wpt_root) + separator + sys.stdout.write(ensure_str(line)) def run_tests_affected(**kwargs): diff --git a/tools/wpt/tests/test_testfiles.py b/tools/wpt/tests/test_testfiles.py index 9fd6c6f9b85153..81f528457c6d68 100644 --- a/tools/wpt/tests/test_testfiles.py +++ b/tools/wpt/tests/test_testfiles.py @@ -6,13 +6,13 @@ def test_getrevish_kwarg(): - assert testfiles.get_revish(revish=u"abcdef") == b"abcdef" - assert testfiles.get_revish(revish=b"abcdef") == b"abcdef" + assert testfiles.get_revish(revish=u"abcdef") == u"abcdef" + assert testfiles.get_revish(revish=b"abcdef") == u"abcdef" def test_getrevish_implicit(): with patch("tools.wpt.testfiles.branch_point", return_value=u"base"): - assert testfiles.get_revish() == b"base..HEAD" + assert testfiles.get_revish() == u"base..HEAD" def test_affected_testfiles(): diff --git a/tools/wpt/utils.py b/tools/wpt/utils.py index 9a6c0646712064..18551481ab8cae 100644 --- a/tools/wpt/utils.py +++ b/tools/wpt/utils.py @@ -5,15 +5,17 @@ import zipfile from io import BytesIO -try: - from typing import Any, Callable -except ImportError: - pass +MYPY = False +if MYPY: + from typing import Any + from typing import Callable + from typing import Dict logger = logging.getLogger(__name__) class Kwargs(dict): + # type: Dict[Any, Any] def set_if_none(self, name, # type: str value, # type: Any diff --git a/tools/wptrunner/wptrunner/tests/test_update.py b/tools/wptrunner/wptrunner/tests/test_update.py index 6dc04cedf34c7a..a1ba8de2878064 100644 --- a/tools/wptrunner/wptrunner/tests/test_update.py +++ b/tools/wptrunner/wptrunner/tests/test_update.py @@ -29,6 +29,19 @@ def SourceFileWithTest(path, hash, cls, *args): return s +def tree_and_sourcefile_mocks(source_files): + paths_dict = {} + tree = [] + for source_file, file_hash, updated in source_files: + paths_dict[source_file.rel_path] = source_file + tree.append([source_file.rel_path, file_hash, updated]) + + def MockSourceFile(tests_root, path, url_base, file_hash): + return paths_dict[path] + + return tree, MockSourceFile + + item_classes = {"testharness": manifest_item.TestharnessTest, "reftest": manifest_item.RefTest, "manual": manifest_item.ManualTest, @@ -129,9 +142,11 @@ def create_test_manifest(tests, url_base="/"): source_files = [] for i, (test, _, test_type, _) in enumerate(tests): if test_type: - source_files.append((SourceFileWithTest(test, str(i) * 40, item_classes[test_type]), True)) - m = manifest.Manifest() - m.update(source_files) + source_files.append(SourceFileWithTest(test, str(i) * 40, item_classes[test_type])) + m = manifest.Manifest("") + tree, sourcefile_mock = tree_and_sourcefile_mocks((item, None, True) for item in source_files) + with mock.patch("manifest.manifest.SourceFile", side_effect=sourcefile_mock): + m.update(tree) return m diff --git a/tools/wptrunner/wptrunner/tests/test_wpttest.py b/tools/wptrunner/wptrunner/tests/test_wpttest.py index dbccd90e339580..4eee9ccb1b6993 100644 --- a/tools/wptrunner/wptrunner/tests/test_wpttest.py +++ b/tools/wptrunner/wptrunner/tests/test_wpttest.py @@ -1,11 +1,13 @@ +import mock from io import BytesIO -from mock import Mock from manifest import manifest as wptmanifest from manifest.item import TestharnessTest, RefTest from manifest.utils import to_os_path +from . test_update import tree_and_sourcefile_mocks from .. import manifestexpected, wpttest + dir_ini_0 = b"""\ prefs: [a:b] """ @@ -83,7 +85,7 @@ def make_mock_manifest(*items): - rv = Mock(tests_root="/foobar") + rv = mock.Mock(tests_root="/foobar") tests = [] rv.__iter__ = lambda self: iter(tests) rv.__getitem__ = lambda self, k: tests[k] @@ -204,13 +206,15 @@ def test_metadata_fuzzy(): references=[["/a/fuzzy-ref.html", "=="]], fuzzy=[[["/a/fuzzy.html", '/a/fuzzy-ref.html', '=='], [[2, 3], [10, 15]]]]) - s = Mock(rel_path="a/fuzzy.html", rel_path_parts=("a", "fuzzy.html"), hash="0"*40) - s.manifest_items = Mock(return_value=(item.item_type, [item])) + s = mock.Mock(rel_path="a/fuzzy.html", rel_path_parts=("a", "fuzzy.html"), hash="0"*40) + s.manifest_items = mock.Mock(return_value=(item.item_type, [item])) - manifest = wptmanifest.Manifest() + manifest = wptmanifest.Manifest("") - assert manifest.update([(s, True)]) is True + tree, sourcefile_mock = tree_and_sourcefile_mocks([(s, None, True)]) + with mock.patch("manifest.manifest.SourceFile", side_effect=sourcefile_mock): + assert manifest.update(tree) is True test_metadata = manifestexpected.static.compile(BytesIO(test_fuzzy), {},