From 6539f1e3433e77e23a33e293007f2da993c96abc Mon Sep 17 00:00:00 2001
From: James Graham <james@hoppipolla.co.uk>
Date: Thu, 21 May 2020 19:50:11 +0100
Subject: [PATCH] Fix a bunch of Python 3 issues in the manifest code

The goal here is to get mypy passing in Python 3; prior to this patch
there were a lot of cases where Python 3 behaviour was broken but not
picked up by the tests (e.g. because it only happened when updating
the manifest or with certain caches or similar). Many of these issues
were seen by mypy, but that had been disabled for Py3 specfically to
avoid having to fix all the issues at the time the annotations were
added. But now we do need that support.

The general approach here is to use the same types for Py 2 and Py
3. In particular we use Text where possible (with a couple of
exceptions where it was easier not to), rather than bytes. The native
str type is avoided wherever possible since this introduces difficult
to debug differences between Python versions.

The preference for Text includes paths, which means we no longer
support running in non-unicode directories (in practice it is likely
to be broken if the path isn't UTF-8 compatible on Unix or UTF-16
compatible on Windows).
---
 tools/gitignore/gitignore.py                  | 136 ++++++++--------
 tools/gitignore/tests/test_gitignore.py       |  93 ++++++-----
 tools/lint/fnmatch.py                         |   8 +-
 tools/lint/lint.py                            | 147 +++++++++---------
 tools/lint/rules.py                           |   2 +-
 tools/lint/tests/test_lint.py                 |  20 +--
 tools/localpaths.py                           |   4 +-
 tools/manifest/XMLParser.py                   |  30 ++--
 tools/manifest/download.py                    |  21 ++-
 tools/manifest/item.py                        |   2 +-
 tools/manifest/manifest.py                    | 102 ++++++++----
 tools/manifest/sourcefile.py                  |  66 ++++----
 tools/manifest/utils.py                       |  58 +++----
 tools/manifest/vcs.py                         |  69 ++++----
 tools/tox.ini                                 |  18 +--
 tools/wpt/revlist.py                          |  17 +-
 tools/wpt/testfiles.py                        |  98 ++++++------
 tools/wpt/tests/test_testfiles.py             |   6 +-
 tools/wpt/utils.py                            |  10 +-
 .../wptrunner/wptrunner/tests/test_update.py  |  21 ++-
 .../wptrunner/wptrunner/tests/test_wpttest.py |  16 +-
 21 files changed, 492 insertions(+), 452 deletions(-)

diff --git a/tools/gitignore/gitignore.py b/tools/gitignore/gitignore.py
index 0f3d9450d12344..2a2761518663ca 100644
--- a/tools/gitignore/gitignore.py
+++ b/tools/gitignore/gitignore.py
@@ -1,7 +1,7 @@
 import re
 import os
 import itertools
-from six import itervalues, iteritems
+from six import ensure_binary, itervalues, iteritems
 from collections import defaultdict
 
 MYPY = False
@@ -26,70 +26,72 @@
 
 
 def fnmatch_translate(pat):
-    # type: (str) -> Tuple[bool, Pattern[str]]
+    # type: (bytes) -> Tuple[bool, Pattern[bytes]]
     parts = []
     seq = None
     i = 0
-    any_char = "[^/]"
-    if pat[0] == "/":
-        parts.append("^")
+    any_char = b"[^/]"
+    if pat[0:1] == b"/":
+        parts.append(b"^")
         pat = pat[1:]
     else:
         # By default match the entire path up to a /
         # but if / doesn't appear in the pattern we will mark is as
         # a name pattern and just produce a pattern that matches against
         # the filename
-        parts.append("^(?:.*/)?")
+        parts.append(b"^(?:.*/)?")
 
     name_pattern = True
-    if pat[-1] == "/":
+    if pat[-1:] == b"/":
         # If the last character is / match this directory or any subdirectory
         pat = pat[:-1]
-        suffix = "(?:/|$)"
+        suffix = b"(?:/|$)"
     else:
-        suffix = "$"
+        suffix = b"$"
     while i < len(pat):
-        c = pat[i]
-        if c == "\\":
+        c = pat[i:i+1]
+        if c == b"\\":
             if i < len(pat) - 1:
                 i += 1
-                c = pat[i]
+                c = pat[i:i+1]
                 parts.append(re.escape(c))
             else:
                 raise ValueError
         elif seq is not None:
             # TODO: this doesn't really handle invalid sequences in the right way
-            if c == "]":
+            if c == b"]":
                 seq = None
-                if parts[-1] == "[":
+                if parts[-1:] == b"[":
                     parts = parts[:-1]
-                elif parts[-1] == "^" and parts[-2] == "[":
+                elif parts[-1:] == b"^" and parts[-2:-1] == b"[":
                     parts = parts[:-2]
                 else:
                     parts.append(c)
-            elif c == "-":
+            elif c == b"-":
                 parts.append(c)
+            elif c == b"[":
+                raise ValueError
             else:
-                parts += re.escape(c)
-        elif c == "[":
-            parts.append("[")
-            if i < len(pat) - 1 and pat[i+1] in ("!", "^"):
-                parts.append("^")
+                parts.append(re.escape(c))
+        elif c == b"[":
+            parts.append(b"[")
+            if i < len(pat) - 1 and pat[i+1:i+2] in (b"!", b"^"):
+                parts.append(b"^")
                 i += 1
             seq = i
-        elif c == "*":
-            if i < len(pat) - 1 and pat[i+1] == "*":
-                if i > 0 and pat[i-1] != "/":
+        elif c == b"*":
+            if i < len(pat) - 1 and pat[i+1:i+2] == b"*":
+                if i > 0 and pat[i-1:i] != b"/":
                     raise ValueError
-                parts.append(".*")
+                parts.append(b".*")
                 i += 1
-                if i < len(pat) - 1 and pat[i+1] != "/":
+                if i < len(pat) - 1 and pat[i+1:i+2] != b"/":
                     raise ValueError
             else:
-                parts.append(any_char + "*")
-        elif c == "?":
+                parts.append(any_char + b"*")
+        elif c == b"?":
             parts.append(any_char)
-        elif c == "/" and not seq:
+        elif c == b"/" and not seq:
             name_pattern = False
             parts.append(c)
         else:
@@ -97,31 +99,31 @@ def fnmatch_translate(pat):
         i += 1
 
     if name_pattern:
-        parts[0] = "^"
+        parts[0] = b"^"
 
     if seq is not None:
         raise ValueError
     parts.append(suffix)
     try:
-        return name_pattern, re.compile("".join(parts))
+        return name_pattern, re.compile(b"".join(parts))
     except Exception:
         raise ValueError
 
 # Regexp matching rules that have to be converted to patterns
-pattern_re = re.compile(r".*[\*\[\?]")
+pattern_re = re.compile(br".*[\*\[\?]")
 
 
 def parse_line(line):
-    # type: (str) -> Optional[Tuple[bool, bool, bool, Union[Tuple[str, ...], Tuple[bool, Pattern[str]]]]]
+    # type: (bytes) -> Optional[Tuple[bool, bool, bool, Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]]]]
     line = line.rstrip()
-    if not line or line[0] == "#":
+    if not line or line[0:1] == b"#":
         return None
 
-    invert = line[0] == "!"
+    invert = line[0:1] == b"!"
     if invert:
         line = line[1:]
 
-    dir_only = line[-1] == "/"
+    dir_only = line[-1:] == b"/"
 
     if dir_only:
         line = line[:-1]
@@ -129,7 +131,7 @@ def parse_line(line):
     # Could make a special case for **/foo, but we don't have any patterns like that
     if not invert and not pattern_re.match(line):
         literal = True
-        pattern = tuple(line.rsplit("/", 1))  # type: Union[Tuple[str, ...], Tuple[bool, Pattern[str]]]
+        pattern = tuple(line.rsplit(b"/", 1))  # type: Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]]
     else:
         pattern = fnmatch_translate(line)
         literal = False
@@ -139,9 +141,9 @@ def parse_line(line):
 
 class PathFilter(object):
     def __init__(self, root, extras=None, cache=None):
-        # type: (str, Optional[List[str]], Optional[MutableMapping[str, bool]]) -> None
+        # type: (bytes, Optional[List[bytes]], Optional[MutableMapping[bytes, bool]]) -> None
         if root:
-            ignore_path = os.path.join(root, ".gitignore")  # type: Optional[str]
+            ignore_path = os.path.join(root, b".gitignore")  # type: Optional[bytes]
         else:
             ignore_path = None
         if not ignore_path and not extras:
@@ -149,32 +151,32 @@ def __init__(self, root, extras=None, cache=None):
             return
         self.trivial = False
 
-        self.literals_file = defaultdict(dict)  # type: Dict[Optional[str], Dict[str, List[Tuple[bool, Pattern[str]]]]]
-        self.literals_dir = defaultdict(dict)  # type: Dict[Optional[str], Dict[str, List[Tuple[bool, Pattern[str]]]]]
-        self.patterns_file = []  # type: List[Tuple[Tuple[bool, Pattern[str]], List[Tuple[bool, Pattern[str]]]]]
-        self.patterns_dir = []  # type: List[Tuple[Tuple[bool, Pattern[str]], List[Tuple[bool, Pattern[str]]]]]
-        self.cache = cache or {}  # type: MutableMapping[str, bool]
+        self.literals_file = defaultdict(dict)  # type: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]]
+        self.literals_dir = defaultdict(dict)  # type: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]]
+        self.patterns_file = []  # type: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]]
+        self.patterns_dir = []  # type: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]]
+        self.cache = cache or {}  # type: MutableMapping[bytes, bool]
 
         if extras is None:
             extras = []
 
         if ignore_path and os.path.exists(ignore_path):
-            args = ignore_path, extras  # type: Tuple[Optional[str], List[str]]
+            args = ignore_path, extras  # type: Tuple[Optional[bytes], List[bytes]]
         else:
             args = None, extras
         self._read_ignore(*args)
 
     def _read_ignore(self, ignore_path, extras):
-        # type: (Optional[str], List[str]) -> None
+        # type: (Optional[bytes], List[bytes]) -> None
         if ignore_path is not None:
-            with open(ignore_path) as f:
+            with open(ignore_path, "rb") as f:
                 for line in f:
                     self._read_line(line)
         for line in extras:
             self._read_line(line)
 
     def _read_line(self, line):
-        # type: (str) -> None
+        # type: (bytes) -> None
         parsed = parse_line(line)
         if not parsed:
             return
@@ -186,13 +188,13 @@ def _read_line(self, line):
             # overriden by an exclude rule
             assert not literal
             if MYPY:
-                rule = cast(Tuple[bool, Pattern[str]], rule)
+                rule = cast(Tuple[bool, Pattern[bytes]], rule)
             if not dir_only:
                 rules_iter = itertools.chain(
                     itertools.chain(*(iteritems(item) for item in itervalues(self.literals_dir))),
                     itertools.chain(*(iteritems(item) for item in itervalues(self.literals_file))),
                     self.patterns_dir,
-                    self.patterns_file)  # type: Iterable[Tuple[Any, List[Tuple[bool, Pattern[str]]]]]
+                    self.patterns_file)  # type: Iterable[Tuple[Any, List[Tuple[bool, Pattern[bytes]]]]]
             else:
                 rules_iter = itertools.chain(
                     itertools.chain(*(iteritems(item) for item in itervalues(self.literals_dir))),
@@ -203,9 +205,9 @@ def _read_line(self, line):
         else:
             if literal:
                 if MYPY:
-                    rule = cast(Tuple[str, ...], rule)
+                    rule = cast(Tuple[bytes, ...], rule)
                 if len(rule) == 1:
-                    dir_name, pattern = None, rule[0]  # type: Tuple[Optional[str], str]
+                    dir_name, pattern = None, rule[0]  # type: Tuple[Optional[bytes], bytes]
                 else:
                     dir_name, pattern = rule
                 self.literals_dir[dir_name][pattern] = []
@@ -213,31 +215,31 @@ def _read_line(self, line):
                     self.literals_file[dir_name][pattern] = []
             else:
                 if MYPY:
-                    rule = cast(Tuple[bool, Pattern[str]], rule)
+                    rule = cast(Tuple[bool, Pattern[bytes]], rule)
                 self.patterns_dir.append((rule, []))
                 if not dir_only:
                     self.patterns_file.append((rule, []))
 
     def filter(self,
-               iterator  # type: Iterable[Tuple[str, List[Tuple[str, T]], List[Tuple[str, T]]]]
+               iterator  # type: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]
                ):
-        # type: (...) -> Iterable[Tuple[str, List[Tuple[str, T]], List[Tuple[str, T]]]]
+        # type: (...) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]
         empty = {}  # type: Dict[Any, Any]
         for dirpath, dirnames, filenames in iterator:
             orig_dirpath = dirpath
-            if os.path.sep != "/":
-                dirpath = dirpath.replace(os.path.sep, "/")
+            if ensure_binary(os.path.sep) != b"/":
+                dirpath = dirpath.replace(ensure_binary(os.path.sep), b"/")
 
-            keep_dirs = []  # type: List[Tuple[str, T]]
-            keep_files = []  # type: List[Tuple[str, T]]
+            keep_dirs = []  # type: List[Tuple[bytes, T]]
+            keep_files = []  # type: List[Tuple[bytes, T]]
 
             for iter_items, literals, patterns, target, suffix in [
-                    (dirnames, self.literals_dir, self.patterns_dir, keep_dirs, "/"),
-                    (filenames, self.literals_file, self.patterns_file, keep_files, "")]:
+                    (dirnames, self.literals_dir, self.patterns_dir, keep_dirs, b"/"),
+                    (filenames, self.literals_file, self.patterns_file, keep_files, b"")]:
                 for item in iter_items:
                     name = item[0]
                     if dirpath:
-                        path = "%s/%s" % (dirpath, name) + suffix
+                        path = b"%s/%s" % (dirpath, name) + suffix
                     else:
                         path = name + suffix
                     if path in self.cache:
@@ -269,13 +271,13 @@ def filter(self,
                             target.append(item)
 
             dirnames[:] = keep_dirs
-            assert not any(".git" == name for name, _ in dirnames)
+            assert not any(b".git" == name for name, _ in dirnames)
             yield orig_dirpath, dirnames, keep_files
 
     def __call__(self,
-                 iterator  # type: Iterable[Tuple[str, List[Tuple[str, T]], List[Tuple[str, T]]]]
+                 iterator  # type: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]
                  ):
-        # type: (...) -> Iterable[Tuple[str, List[Tuple[str, T]], List[Tuple[str, T]]]]
+        # type: (...) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]
         if self.trivial:
             return iterator
 
@@ -283,5 +285,5 @@ def __call__(self,
 
 
 def has_ignore(dirpath):
-    # type: (str) -> bool
-    return os.path.exists(os.path.join(dirpath, ".gitignore"))
+    # type: (bytes) -> bool
+    return os.path.exists(os.path.join(dirpath, b".gitignore"))
diff --git a/tools/gitignore/tests/test_gitignore.py b/tools/gitignore/tests/test_gitignore.py
index 8131a71875b4d5..317f3799b62bb2 100644
--- a/tools/gitignore/tests/test_gitignore.py
+++ b/tools/gitignore/tests/test_gitignore.py
@@ -10,62 +10,61 @@
     from typing import Sequence
 
 match_data = [
-    ("foo", True, ["a/foo", "foo"]),
-    ("*.a", True, ["foo.a", "a/foo.a", "a/b/foo.a", "a.a/foo.a"]),
-    ("*.py[co]", True, ["a.pyc", "a.pyo", "a/b/c.pyc"]),
-    ("\\#*", True, ["#a", "a/#b"]),
-    ("*#", True, ["a#", "a/b#", "#a#"]),
-    ("/*.c", True, ["a.c", ".c"]),
-    ("**/b", False, ["a/b", "a/c/b"]),
-    ("*b", True, ["ab"]),
-    ("*b", True, ["a/b"]),
-    ("**/b", False, ["a/b"]),
-    ("a/", True, ["a"]),
-    ("a[/]b", True, []),
-    ("**/b", False, ["a/c/b"]),
-    ("a?c", True, ["abc"]),
-    ("a[^b]c", True, ["acc"]),
-    ("a[b-c]c", True, ["abc", "acc"]),
-    ("a[^]c", True, ["ac"]),  # This is probably wrong
-    ("a[^]c", True, ["ac"]),  # This is probably wrong
-]  # type: Sequence[Tuple[str, bool, Iterable[str]]]
+    (b"foo", True, [b"a/foo", b"foo"]),
+    (b"*.a", True, [b"foo.a", b"a/foo.a", b"a/b/foo.a", b"a.a/foo.a"]),
+    (b"*.py[co]", True, [b"a.pyc", b"a.pyo", b"a/b/c.pyc"]),
+    (b"\\#*", True, [b"#a", b"a/#b"]),
+    (b"*#", True, [b"a#", b"a/b#", b"#a#"]),
+    (b"/*.c", True, [b"a.c", b".c"]),
+    (b"**/b", False, [b"a/b", b"a/c/b"]),
+    (b"*b", True, [b"ab"]),
+    (b"*b", True, [b"a/b"]),
+    (b"**/b", False, [b"a/b"]),
+    (b"a/", True, [b"a"]),
+    (b"a[/]b", True, []),
+    (b"**/b", False, [b"a/c/b"]),
+    (b"a?c", True, [b"abc"]),
+    (b"a[^b]c", True, [b"acc"]),
+    (b"a[b-c]c", True, [b"abc", b"acc"]),
+]  # type: Sequence[Tuple[bytes, bool, Iterable[bytes]]]
 
 mismatch_data = [
-    ("foo", True, ["foob", "afoo"]),
-    ("*.a", True, ["a", "foo:a", "a.a/foo"]),
-    ("*.py[co]", True, ["a.pyd", "pyo", "a.py"]),
-    ("a", True, ["ab"]),
-    ("a?c", True, ["ac", "abbc"]),
-    ("a[^b]c", True, ["abc"]),
-    ("a[b-c]c", True, ["adc"]),
-]  # type: Sequence[Tuple[str, bool, Iterable[str]]]
+    (b"foo", True, [b"foob", b"afoo"]),
+    (b"*.a", True, [b"a", b"foo:a", b"a.a/foo"]),
+    (b"*.py[co]", True, [b"a.pyd", b"pyo", b"a.py"]),
+    (b"a", True, [b"ab"]),
+    (b"a?c", True, [b"ac", b"abbc"]),
+    (b"a[^b]c", True, [b"abc"]),
+    (b"a[b-c]c", True, [b"adc"]),
+]  # type: Sequence[Tuple[bytes, bool, Iterable[bytes]]]
 
 invalid_data = [
-    "[a",
-    "***/foo",
-    "a\\",
-    "**b",
-    "b**/",
-    "[[]"
+    b"[a",
+    b"***/foo",
+    b"a\\",
+    b"**b",
+    b"b**/",
+    b"[[]",
+    b"a[^]c",
 ]
 
 filter_data = [
-    (["foo", "bar/", "/a", "*.py"],
-     [("", ["foo", "bar", "baz"], ["a"]),
-      ("baz", ["a"], ["foo", "bar"])],
-     [(["baz"], []),
-      (["a"], ["bar"])]),
-    (["#foo", "", "a*", "!a.py"],
-     [("", ["foo"], ["a", "a.foo", "a.py"])],
-     [(["foo"], ["a.py"])]),
-    (["a.foo", "!a.py"],
-     [("", ["foo"], ["a", "a.foo", "a.py"])],
-     [(["foo"], ["a", "a.py"])]),
+    ([b"foo", b"bar/", b"/a", b"*.py"],
+     [(b"", [b"foo", b"bar", b"baz"], [b"a"]),
+      (b"baz", [b"a"], [b"foo", b"bar"])],
+     [([b"baz"], []),
+      ([b"a"], [b"bar"])]),
+    ([b"#foo", b"", b"a*", b"!a.py"],
+     [(b"", [b"foo"], [b"a", b"a.foo", b"a.py"])],
+     [([b"foo"], [b"a.py"])]),
+    ([b"a.foo", b"!a.py"],
+     [(b"", [b"foo"], [b"a", b"a.foo", b"a.py"])],
+     [([b"foo"], [b"a", b"a.py"])]),
 ]
 
 
 def expand_data(compact_data):
-    # type: (Sequence[Tuple[str, bool, Iterable[str]]]) -> Iterable[Tuple[str, bool, str]]
+    # type: (Sequence[Tuple[bytes, bool, Iterable[bytes]]]) -> Iterable[Tuple[bytes, bool, bytes]]
     for pattern, name_only, inputs in compact_data:
         for input in inputs:
             yield pattern, name_only, input
@@ -76,7 +75,7 @@ def tests_match(pattern, name_only, input):
     name_only_result, regexp = fnmatch_translate(pattern)
     assert name_only_result == name_only
     if name_only:
-        input = input.rsplit("/", 1)[-1]
+        input = input.rsplit(b"/", 1)[-1]
     assert regexp.match(input) is not None
 
 
@@ -85,7 +84,7 @@ def tests_no_match(pattern, name_only, input):
     name_only_result, regexp = fnmatch_translate(pattern)
     assert name_only_result == name_only
     if name_only:
-        input = input.rsplit("/", 1)[-1]
+        input = input.rsplit(b"/", 1)[-1]
     assert regexp.match(input) is None
 
 
diff --git a/tools/lint/fnmatch.py b/tools/lint/fnmatch.py
index 3d9ee23c97e5ba..0c45029b23915f 100644
--- a/tools/lint/fnmatch.py
+++ b/tools/lint/fnmatch.py
@@ -6,23 +6,23 @@
 MYPY = False
 if MYPY:
     # MYPY is set to True when run under Mypy.
-    from typing import AnyStr
     from typing import Iterable
     from typing import List
+    from typing import Text
 
 
 __all__ = ["fnmatch", "fnmatchcase", "filter", "translate"]
 
 
 def fnmatch(name, pat):
-    # type: (AnyStr, AnyStr) -> bool
+    # type: (Text, Text) -> bool
     name = os.path.normcase(name)
     pat = os.path.normcase(pat)
     return fnmatchcase(name, pat)
 
 
 def fnmatchcase(name, pat):
-    # type: (AnyStr, AnyStr) -> bool
+    # type: (Text, Text) -> bool
     if '?' not in pat and '[' not in pat:
         wildcards = pat.count("*")
         if wildcards == 0:
@@ -35,7 +35,7 @@ def fnmatchcase(name, pat):
 
 
 def filter(names, pat):
-    # type: (Iterable[AnyStr], AnyStr) -> List[AnyStr]
+    # type: (Iterable[Text], Text) -> List[Text]
     return [n for n in names if fnmatch(n, pat)]
 
 
diff --git a/tools/lint/lint.py b/tools/lint/lint.py
index fc1a816f8abf85..24d190c23b37a9 100644
--- a/tools/lint/lint.py
+++ b/tools/lint/lint.py
@@ -3,6 +3,7 @@
 import abc
 import argparse
 import ast
+import io
 import json
 import logging
 import os
@@ -21,7 +22,7 @@
 from ..manifest.vcs import walk
 
 from ..manifest.sourcefile import SourceFile, js_meta_re, python_meta_re, space_chars, get_any_variants
-from six import binary_type, iteritems, itervalues, with_metaclass
+from six import binary_type, ensure_binary, ensure_text, iteritems, itervalues, with_metaclass
 from six.moves import range
 from six.moves.urllib.parse import urlsplit, urljoin
 
@@ -39,7 +40,6 @@
     from typing import Text
     from typing import Tuple
     from typing import Type
-    from typing import Union
 
     # The Ignorelist is a two level dictionary. The top level is indexed by
     # error names (e.g. 'TRAILING WHITESPACE'). Each of those then has a map of
@@ -51,6 +51,7 @@
 
 logger = None  # type: Optional[logging.Logger]
 
+
 def setup_logging(prefix=False):
     # type: (bool) -> None
     global logger
@@ -89,24 +90,27 @@ def setup_logging(prefix=False):
 
 %s: %s"""
 
+
 def all_filesystem_paths(repo_root, subdir=None):
-    # type: (str, Optional[str]) -> Iterable[str]
-    path_filter = PathFilter(repo_root, extras=[str(".git/")])
+    # type: (Text, Optional[Text]) -> Iterable[Text]
+    path_filter = PathFilter(repo_root.encode("utf8"),
+                             extras=[ensure_binary(".git/")])
     if subdir:
-        expanded_path = subdir
+        expanded_path = subdir.encode("utf8")
+        subdir_str = expanded_path
     else:
-        expanded_path = repo_root
+        expanded_path = repo_root.encode("utf8")
     for dirpath, dirnames, filenames in path_filter(walk(expanded_path)):
         for filename, _ in filenames:
             path = os.path.join(dirpath, filename)
             if subdir:
-                path = os.path.join(subdir, path)
+                path = os.path.join(subdir_str, path)
             assert not os.path.isabs(path), path
-            yield path
+            yield ensure_text(path)
 
 
 def _all_files_equal(paths):
-    # type: (Iterable[str]) -> bool
+    # type: (Iterable[Text]) -> bool
     """
     Checks all the paths are files that are byte-for-byte identical
 
@@ -145,21 +149,21 @@ def _all_files_equal(paths):
 
 
 def check_path_length(repo_root, path):
-    # type: (str, str) -> List[rules.Error]
+    # type: (Text, Text) -> List[rules.Error]
     if len(path) + 1 > 150:
         return [rules.PathLength.error(path, (path, len(path) + 1))]
     return []
 
 
 def check_file_type(repo_root, path):
-    # type: (str, str) -> List[rules.Error]
+    # type: (Text, Text) -> List[rules.Error]
     if os.path.islink(path):
         return [rules.FileType.error(path, (path, "symlink"))]
     return []
 
 
 def check_worker_collision(repo_root, path):
-    # type: (str, str) -> List[rules.Error]
+    # type: (Text, Text) -> List[rules.Error]
     endings = [(".any.html", ".any.js"),
                (".any.worker.html", ".any.js"),
                (".worker.html", ".worker.js")]
@@ -170,7 +174,7 @@ def check_worker_collision(repo_root, path):
 
 
 def check_gitignore_file(repo_root, path):
-    # type: (str, str) -> List[rules.Error]
+    # type: (Text, Text) -> List[rules.Error]
     if not path.endswith(".gitignore"):
         return []
 
@@ -190,7 +194,7 @@ def check_gitignore_file(repo_root, path):
 
 
 def check_ahem_copy(repo_root, path):
-    # type: (str, str) -> List[rules.Error]
+    # type: (Text, Text) -> List[rules.Error]
     lpath = path.lower()
     if "ahem" in lpath and lpath.endswith(".ttf"):
         return [rules.AhemCopy.error(path)]
@@ -198,7 +202,7 @@ def check_ahem_copy(repo_root, path):
 
 
 def check_git_ignore(repo_root, paths):
-    # type: (str, List[str]) -> List[rules.Error]
+    # type: (Text, List[Text]) -> List[rules.Error]
     errors = []
     with tempfile.TemporaryFile('w+') as f:
         f.write('\n'.join(paths))
@@ -206,12 +210,13 @@ def check_git_ignore(repo_root, paths):
         try:
             matches = subprocess.check_output(
                 ["git", "check-ignore", "--verbose", "--no-index", "--stdin"], stdin=f)
-            for match in matches.strip().split('\n'):
-                match_filter, path = match.split()
-                _, _, filter_string = match_filter.split(':')
+            for match in matches.strip().split(b'\n'):
+                match_filter, path_bytes = match.split()
+                _, _, filter_string = match_filter.split(b':')
                 # If the matching filter reported by check-ignore is a special-case exception,
                 # that's fine. Otherwise, it requires a new special-case exception.
-                if filter_string[0] != '!':
+                if filter_string[0:1] != b'!':
+                    path = path_bytes.decode("utf8")
                     errors.append(rules.IgnoredPath.error(path, (path,)))
         except subprocess.CalledProcessError:
             # Nonzero return code means that no match exists.
@@ -225,7 +230,7 @@ def check_git_ignore(repo_root, paths):
 
 
 def check_css_globally_unique(repo_root, paths):
-    # type: (str, List[str]) -> List[rules.Error]
+    # type: (Text, List[Text]) -> List[rules.Error]
     """
     Checks that CSS filenames are sufficiently unique
 
@@ -243,32 +248,29 @@ def check_css_globally_unique(repo_root, paths):
     :returns: a list of errors found in ``paths``
 
     """
-    test_files = defaultdict(set)  # type: Dict[Union[bytes, Text], Set[str]]
-    ref_files = defaultdict(set)  # type: Dict[Union[bytes, Text], Set[str]]
-    support_files = defaultdict(set)  # type: Dict[Union[bytes, Text], Set[str]]
+    test_files = defaultdict(set)  # type: Dict[Text, Set[Text]]
+    ref_files = defaultdict(set)  # type: Dict[Text, Set[Text]]
+    support_files = defaultdict(set)  # type: Dict[Text, Set[Text]]
 
     for path in paths:
         if os.name == "nt":
-            if isinstance(path, binary_type):
-                path = path.replace(b"\\", b"/")
-            else:
-                path = path.replace(u"\\", u"/")
+            path = path.replace(u"\\", u"/")
 
-        if not path.startswith("css/"):
+        if not path.startswith(u"css/"):
             continue
 
-        source_file = SourceFile(repo_root, path, "/")
+        source_file = SourceFile(repo_root, path, u"/")
         if source_file.name_is_non_test:
             # If we're name_is_non_test for a reason apart from support, ignore it.
             # We care about support because of the requirement all support files in css/ to be in
             # a support directory; see the start of check_parsed.
-            offset = path.find("/support/")
+            offset = path.find(u"/support/")
             if offset == -1:
                 continue
 
             parts = source_file.dir_path.split(os.path.sep)
             if (parts[0] in source_file.root_dir_non_test or
-                any(item in source_file.dir_non_test - {"support"} for item in parts) or
+                any(item in source_file.dir_non_test - {u"support"} for item in parts) or
                 any(parts[:len(non_test_path)] == list(non_test_path) for non_test_path in source_file.dir_path_non_test)):
                 continue
 
@@ -277,11 +279,8 @@ def check_css_globally_unique(repo_root, paths):
         elif source_file.name_is_reference:
             ref_files[source_file.name].add(path)
         else:
-            test_name = source_file.name  # type: Union[bytes, Text]
-            if isinstance(test_name, bytes):
-                test_name = test_name.replace(b'-manual', b'')
-            else:
-                test_name = test_name.replace(u'-manual', u'')
+            test_name = source_file.name  # type: Text
+            test_name = test_name.replace(u'-manual', u'')
             test_files[test_name].add(path)
 
     errors = []
@@ -290,9 +289,9 @@ def check_css_globally_unique(repo_root, paths):
         if len(colliding) > 1:
             if not _all_files_equal([os.path.join(repo_root, x) for x in colliding]):
                 # Only compute by_spec if there are prima-facie collisions because of cost
-                by_spec = defaultdict(set)  # type: Dict[Text, Set[str]]
+                by_spec = defaultdict(set)  # type: Dict[Text, Set[Text]]
                 for path in colliding:
-                    source_file = SourceFile(repo_root, path, "/")
+                    source_file = SourceFile(repo_root, path, u"/")
                     for link in source_file.spec_links:
                         for r in (drafts_csswg_re, w3c_tr_re, w3c_dev_re):
                             m = r.match(link)
@@ -324,7 +323,7 @@ def check_css_globally_unique(repo_root, paths):
 
 
 def check_unique_testharness_basenames(repo_root, paths):
-    # type: (str, List[str]) -> List[rules.Error]
+    # type: (Text, List[Text]) -> List[rules.Error]
     """
     Checks that all testharness files have unique basename paths.
 
@@ -359,7 +358,7 @@ def check_unique_testharness_basenames(repo_root, paths):
 
 
 def parse_ignorelist(f):
-    # type: (IO[bytes]) -> Tuple[Ignorelist, Set[Text]]
+    # type: (IO[Text]) -> Tuple[Ignorelist, Set[Text]]
     """
     Parse the ignorelist file given by `f`, and return the parsed structure.
 
@@ -440,7 +439,7 @@ def filter_ignorelist_errors(data, errors):
 
 
 def check_regexp_line(repo_root, path, f):
-    # type: (str, str, IO[bytes]) -> List[rules.Error]
+    # type: (Text, Text, IO[bytes]) -> List[rules.Error]
     errors = []  # type: List[rules.Error]
 
     applicable_regexps = [regexp for regexp in regexps if regexp.applies(path)]
@@ -454,7 +453,7 @@ def check_regexp_line(repo_root, path, f):
 
 
 def check_parsed(repo_root, path, f):
-    # type: (str, str, IO[bytes]) -> List[rules.Error]
+    # type: (Text, Text, IO[bytes]) -> List[rules.Error]
     source_file = SourceFile(repo_root, path, "/", contents=f.read())
 
     errors = []  # type: List[rules.Error]
@@ -630,7 +629,7 @@ def check(self, root):
 ast_checkers = [item() for item in [OpenModeCheck]]
 
 def check_python_ast(repo_root, path, f):
-    # type: (str, str, IO[bytes]) -> List[rules.Error]
+    # type: (Text, Text, IO[bytes]) -> List[rules.Error]
     # *.quic.py are Python 3 only and cannot be parsed by Python 2.
     if not path.endswith(".py") or path.endswith(".quic.py"):
         return []
@@ -652,7 +651,7 @@ def check_python_ast(repo_root, path, f):
 
 
 def check_global_metadata(value):
-    # type: (str) -> Iterable[Tuple[Type[rules.Rule], Tuple[Any, ...]]]
+    # type: (bytes) -> Iterable[Tuple[Type[rules.Rule], Tuple[Any, ...]]]
     global_values = {item.strip().decode("utf8") for item in value.split(b",") if item.strip()}
 
     # TODO: this could check for duplicates and such
@@ -662,7 +661,7 @@ def check_global_metadata(value):
 
 
 def check_script_metadata(repo_root, path, f):
-    # type: (str, str, IO[bytes]) -> List[rules.Error]
+    # type: (Text, Text, IO[bytes]) -> List[rules.Error]
     if path.endswith((".worker.js", ".any.js")):
         meta_re = js_meta_re
         broken_metadata = broken_js_metadata
@@ -713,7 +712,7 @@ def check_script_metadata(repo_root, path, f):
 
 
 def check_ahem_system_font(repo_root, path, f):
-    # type: (str, str, IO[bytes]) -> List[rules.Error]
+    # type: (Text, Text, IO[bytes]) -> List[rules.Error]
     if not path.endswith((".html", ".htm", ".xht", ".xhtml")):
         return []
     contents = f.read()
@@ -724,7 +723,7 @@ def check_ahem_system_font(repo_root, path, f):
 
 
 def check_path(repo_root, path):
-    # type: (str, str) -> List[rules.Error]
+    # type: (Text, Text) -> List[rules.Error]
     """
     Runs lints that check the file path.
 
@@ -740,7 +739,7 @@ def check_path(repo_root, path):
 
 
 def check_all_paths(repo_root, paths):
-    # type: (str, List[str]) -> List[rules.Error]
+    # type: (Text, List[Text]) -> List[rules.Error]
     """
     Runs lints that check all paths globally.
 
@@ -756,7 +755,7 @@ def check_all_paths(repo_root, paths):
 
 
 def check_file_contents(repo_root, path, f):
-    # type: (str, str, IO[bytes]) -> List[rules.Error]
+    # type: (Text, Text, IO[bytes]) -> List[rules.Error]
     """
     Runs lints that check the file contents.
 
@@ -824,25 +823,23 @@ def output_error_count(error_count):
 
 
 def changed_files(wpt_root):
-    # type: (str) -> List[Text]
+    # type: (Text) -> List[Text]
     revish = testfiles.get_revish(revish=None)
     changed, _ = testfiles.files_changed(revish, None, include_uncommitted=True, include_new=True)
     return [os.path.relpath(item, wpt_root) for item in changed]
 
 
 def lint_paths(kwargs, wpt_root):
-    # type: (Dict[str, Any], str) -> List[str]
-    if kwargs.get(str("paths")):
+    # type: (Dict[Text, Any], Text) -> List[Text]
+    if kwargs.get("paths"):
         paths = []
-        for path in kwargs.get(str("paths"), []):
+        for path in kwargs.get("paths", []):
             if os.path.isdir(path):
                 path_dir = list(all_filesystem_paths(wpt_root, path))
                 paths.extend(path_dir)
             elif os.path.isfile(path):
                 paths.append(os.path.relpath(os.path.abspath(path), wpt_root))
-
-
-    elif kwargs[str("all")]:
+    elif kwargs["all"]:
         paths = list(all_filesystem_paths(wpt_root))
     else:
         changed_paths = changed_files(wpt_root)
@@ -852,7 +849,7 @@ def lint_paths(kwargs, wpt_root):
             if path == "lint.ignore" or path.startswith("tools/lint/"):
                 force_all = True
                 break
-        paths = (list(changed_paths) if not force_all  # type: ignore
+        paths = (list(changed_paths) if not force_all
                  else list(all_filesystem_paths(wpt_root)))
 
     return paths
@@ -867,43 +864,47 @@ def create_parser():
                         help="Output machine-readable JSON format")
     parser.add_argument("--markdown", action="store_true",
                         help="Output markdown")
-    parser.add_argument("--repo-root", help="The WPT directory. Use this "
+    parser.add_argument("--repo-root", type=ensure_text,
+                        help="The WPT directory. Use this "
                         "option if the lint script exists outside the repository")
-    parser.add_argument("--ignore-glob", help="Additional file glob to ignore.")
+    parser.add_argument("--ignore-glob", type=ensure_text,
+                        help="Additional file glob to ignore.")
     parser.add_argument("--all", action="store_true", help="If no paths are passed, try to lint the whole "
                         "working directory, not just files that changed")
     return parser
 
 
-def main(**kwargs):
+def main(**kwargs_str):
     # type: (**Any) -> int
+    kwargs = {ensure_text(key): value for key, value in iteritems(kwargs_str)}
+
     assert logger is not None
-    if kwargs.get(str("json")) and kwargs.get(str("markdown")):
+    if kwargs.get("json") and kwargs.get("markdown"):
         logger.critical("Cannot specify --json and --markdown")
         sys.exit(2)
 
-    repo_root = kwargs.get(str('repo_root')) or localpaths.repo_root
-    output_format = {(True, False): str("json"),
-                     (False, True): str("markdown"),
-                     (False, False): str("normal")}[(kwargs.get(str("json"), False),
-                                                     kwargs.get(str("markdown"), False))]
+    repo_root = kwargs.get('repo_root') or localpaths.repo_root
+    output_format = {(True, False): "json",
+                     (False, True): "markdown",
+                     (False, False): "normal"}[(kwargs.get("json", False),
+                                                kwargs.get("markdown", False))]
 
     if output_format == "markdown":
         setup_logging(True)
 
     paths = lint_paths(kwargs, repo_root)
 
-    ignore_glob = kwargs.get(str("ignore_glob")) or str()
+    ignore_glob = kwargs.get("ignore_glob") or ""
 
-    return lint(repo_root, paths, output_format, str(ignore_glob))
+    return lint(repo_root, paths, output_format, ignore_glob)
 
 
-def lint(repo_root, paths, output_format, ignore_glob=str()):
-    # type: (str, List[str], str, str) -> int
+def lint(repo_root, paths, output_format, ignore_glob=""):
+    # type: (Text, List[Text], Text, Text) -> int
     error_count = defaultdict(int)  # type: Dict[Text, int]
     last = None
 
-    with open(os.path.join(repo_root, "lint.ignore")) as f:
+    with io.open(os.path.join(repo_root, "lint.ignore"), "r") as f:
         ignorelist, skipped_files = parse_ignorelist(f)
 
     if ignore_glob:
@@ -948,8 +949,8 @@ def process_errors(errors):
         last = process_errors(errors) or last
 
         if not os.path.isdir(abs_path):
-            with open(abs_path, 'rb') as f:
-                errors = check_file_contents(repo_root, path, f)
+            with io.open(abs_path, 'rb') as test_file:
+                errors = check_file_contents(repo_root, path, test_file)
                 last = process_errors(errors) or last
 
     errors = check_all_paths(repo_root, paths)
diff --git a/tools/lint/rules.py b/tools/lint/rules.py
index 695f6cd4e5328c..6ffd749b9ad539 100644
--- a/tools/lint/rules.py
+++ b/tools/lint/rules.py
@@ -354,7 +354,7 @@ def __init__(self):
         self._re = re.compile(self.pattern)  # type: Pattern[bytes]
 
     def applies(self, path):
-        # type: (str) -> bool
+        # type: (Text) -> bool
         return (self.file_extensions is None or
                 os.path.splitext(path)[1] in self.file_extensions)
 
diff --git a/tools/lint/tests/test_lint.py b/tools/lint/tests/test_lint.py
index 10d9728b405751..3741f20097c69f 100644
--- a/tools/lint/tests/test_lint.py
+++ b/tools/lint/tests/test_lint.py
@@ -483,12 +483,12 @@ def test_ignore_glob(caplog):
 def test_all_filesystem_paths():
     with mock.patch(
             'tools.lint.lint.walk',
-            return_value=[('',
-                           [('dir_a', None), ('dir_b', None)],
-                           [('file_a', None), ('file_b', None)]),
-                          ('dir_a',
+            return_value=[(b'',
+                           [(b'dir_a', None), (b'dir_b', None)],
+                           [(b'file_a', None), (b'file_b', None)]),
+                          (b'dir_a',
                            [],
-                           [('file_c', None), ('file_d', None)])]
+                           [(b'file_c', None), (b'file_d', None)])]
     ):
         got = list(lint_mod.all_filesystem_paths('.'))
         assert got == ['file_a',
@@ -500,12 +500,12 @@ def test_all_filesystem_paths():
 def test_filesystem_paths_subdir():
     with mock.patch(
             'tools.lint.lint.walk',
-            return_value=[('',
-                           [('dir_a', None), ('dir_b', None)],
-                           [('file_a', None), ('file_b', None)]),
-                          ('dir_a',
+            return_value=[(b'',
+                           [(b'dir_a', None), (b'dir_b', None)],
+                           [(b'file_a', None), (b'file_b', None)]),
+                          (b'dir_a',
                            [],
-                           [('file_c', None), ('file_d', None)])]
+                           [(b'file_c', None), (b'file_d', None)])]
     ):
         got = list(lint_mod.all_filesystem_paths('.', 'dir'))
         assert got == [os.path.join('dir', 'file_a'),
diff --git a/tools/localpaths.py b/tools/localpaths.py
index ce3b41e300c9f1..e1aa644988b2be 100644
--- a/tools/localpaths.py
+++ b/tools/localpaths.py
@@ -1,8 +1,8 @@
 import os
 import sys
+from six import ensure_text
 
 here = os.path.abspath(os.path.split(__file__)[0])
-repo_root = os.path.abspath(os.path.join(here, os.pardir))
 
 sys.path.insert(0, os.path.join(here))
 sys.path.insert(0, os.path.join(here, "wptserve"))
@@ -28,3 +28,5 @@
 
 if sys.version_info[0] == 2:
     sys.path.insert(0, os.path.join(here, "third_party", "enum"))
+
+repo_root = ensure_text(os.path.abspath(os.path.join(here, os.pardir)))
diff --git a/tools/manifest/XMLParser.py b/tools/manifest/XMLParser.py
index 6f5ff4d35dab67..80aa3b5b920b67 100644
--- a/tools/manifest/XMLParser.py
+++ b/tools/manifest/XMLParser.py
@@ -6,6 +6,8 @@
 from xml.parsers import expat
 import xml.etree.ElementTree as etree  # noqa: N813
 
+from six import text_type
+
 MYPY = False
 if MYPY:
     # MYPY is set to True when run under Mypy.
@@ -24,9 +26,9 @@ def _wrap_error(e):
     err.position = e.lineno, e.offset
     raise err
 
-_names = {}  # type: Dict[str, str]
+_names = {}  # type: Dict[Text, Text]
 def _fixname(key):
-    # type: (str) -> str
+    # type: (Text) -> Text
     try:
         name = _names[key]
     except KeyError:
@@ -54,7 +56,7 @@ class XMLParser(object):
     Python does, rather than just those supported by expat.
     """
     def __init__(self, encoding=None):
-        # type: (Optional[str]) -> None
+        # type: (Optional[Text]) -> None
         self._parser = expat.ParserCreate(encoding, "}")
         self._target = etree.TreeBuilder()
         # parser settings
@@ -63,6 +65,9 @@ def __init__(self, encoding=None):
         self._parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
         # parser callbacks
         self._parser.XmlDeclHandler = self._xml_decl
+        # mypy generates a type error in py2 because it wants
+        # StartElementHandler to take str, List[str]. But the code
+        # seems to always pass in Text to this function
         self._parser.StartElementHandler = self._start
         self._parser.EndElementHandler = self._end
         self._parser.CharacterDataHandler = self._data
@@ -70,32 +75,33 @@ def __init__(self, encoding=None):
         self._parser.SkippedEntityHandler = self._skipped  # type: ignore
         # used for our horrible re-encoding hack
         self._fed_data = []  # type: Optional[List[bytes]]
-        self._read_encoding = None  # type: Optional[str]
+        self._read_encoding = None  # type: Optional[Text]
 
     def _xml_decl(self, version, encoding, standalone):
-        # type: (str, Optional[str], int) -> None
+        # type: (Text, Optional[Text], int) -> None
         self._read_encoding = encoding
 
     def _start(self, tag, attrib_in):
-        # type: (str, List[str]) -> etree.Element
+        # type: (Text, List[str]) -> etree.Element
+        assert isinstance(tag, text_type)
         self._fed_data = None
         tag = _fixname(tag)
-        attrib = OrderedDict()  # type: Dict[Union[str, Text], Union[str, Text]]
+        attrib = OrderedDict()  # type: Dict[Union[bytes, Text], Union[bytes, Text]]
         if attrib_in:
             for i in range(0, len(attrib_in), 2):
                 attrib[_fixname(attrib_in[i])] = attrib_in[i+1]
         return self._target.start(tag, attrib)
 
     def _data(self, text):
-        # type: (str) -> None
+        # type: (Text) -> None
         self._target.data(text)
 
     def _end(self, tag):
-        # type: (str) -> etree.Element
+        # type: (Text) -> etree.Element
         return self._target.end(_fixname(tag))
 
     def _external(self, context, base, system_id, public_id):
-        # type: (str, Optional[str], Optional[str], Optional[str]) -> bool
+        # type: (Text, Optional[Text], Optional[Text], Optional[Text]) -> bool
         if public_id in {
                 "-//W3C//DTD XHTML 1.0 Transitional//EN",
                 "-//W3C//DTD XHTML 1.1//EN",
@@ -117,7 +123,7 @@ def _external(self, context, base, system_id, public_id):
         return True
 
     def _skipped(self, name, is_parameter_entity):
-        # type: (str, bool) -> None
+        # type: (Text, bool) -> None
         err = expat.error("undefined entity %s: line %d, column %d" %
                           (name, self._parser.ErrorLineNumber,
                            self._parser.ErrorColumnNumber))
@@ -127,7 +133,7 @@ def _skipped(self, name, is_parameter_entity):
         raise err
 
     def feed(self, data):
-        # type: (str) -> None
+        # type: (bytes) -> None
         if self._fed_data is not None:
             self._fed_data.append(data)
         try:
diff --git a/tools/manifest/download.py b/tools/manifest/download.py
index f8be03efb2750a..9d763181d82db7 100644
--- a/tools/manifest/download.py
+++ b/tools/manifest/download.py
@@ -35,12 +35,12 @@
 
 
 def abs_path(path):
-    # type: (str) -> str
+    # type: (Text) -> Text
     return os.path.abspath(os.path.expanduser(path))
 
 
 def should_download(manifest_path, rebuild_time=timedelta(days=5)):
-    # type: (str, timedelta) -> bool
+    # type: (Text, timedelta) -> bool
     if not os.path.exists(manifest_path):
         return True
     mtime = datetime.fromtimestamp(os.path.getmtime(manifest_path))
@@ -51,7 +51,7 @@ def should_download(manifest_path, rebuild_time=timedelta(days=5)):
 
 
 def merge_pr_tags(repo_root, max_count=50):
-    # type: (str, int) -> List[Text]
+    # type: (Text, int) -> List[Text]
     gitfunc = git(repo_root)
     tags = []  # type: List[Text]
     if gitfunc is None:
@@ -64,7 +64,7 @@ def merge_pr_tags(repo_root, max_count=50):
 
 
 def score_name(name):
-    # type: (str) -> Optional[int]
+    # type: (Text) -> Optional[int]
     """Score how much we like each filename, lower wins, None rejects"""
 
     # Accept both ways of naming the manifest asset, even though
@@ -111,7 +111,7 @@ def github_url(tags):
 
 
 def download_manifest(
-        manifest_path,  # type: str
+        manifest_path,  # type: Text
         tags_func,  # type: Callable[[], List[Text]]
         url_func,  # type: Callable[[List[Text]], Optional[List[Text]]]
         force=False  # type: bool
@@ -159,7 +159,8 @@ def download_manifest(
             fileobj = io.BytesIO(resp.read())
             try:
                 with gzip.GzipFile(fileobj=fileobj) as gzf:
-                    decompressed = gzf.read()  # type: ignore
+                    data = read_gzf(gzf)  # type: ignore
+                    decompressed = data
             except IOError:
                 logger.warning("Failed to decompress downloaded file")
                 continue
@@ -180,6 +181,12 @@ def download_manifest(
     return True
 
 
+def read_gzf(gzf):  # type: ignore
+    # This is working around a mypy problem in Python 2:
+    # "Call to untyped function "read" in typed context"
+    return gzf.read()
+
+
 def create_parser():
     # type: () -> argparse.ArgumentParser
     parser = argparse.ArgumentParser()
@@ -194,7 +201,7 @@ def create_parser():
 
 
 def download_from_github(path, tests_root, force=False):
-    # type: (str, str, bool) -> bool
+    # type: (Text, Text, bool) -> bool
     return download_manifest(path, lambda: merge_pr_tags(tests_root), github_url,
                              force=force)
 
diff --git a/tools/manifest/item.py b/tools/manifest/item.py
index e232925ed635c6..efb49d7f4ec9ef 100644
--- a/tools/manifest/item.py
+++ b/tools/manifest/item.py
@@ -198,7 +198,7 @@ def quic(self):
 
     @property
     def script_metadata(self):
-        # type: () -> Optional[Text]
+        # type: () -> Optional[List[Tuple[Text, Text]]]
         return self._extras.get("script_metadata")
 
     def to_json(self):
diff --git a/tools/manifest/manifest.py b/tools/manifest/manifest.py
index d5d8d53b08b937..449cd245acbc59 100644
--- a/tools/manifest/manifest.py
+++ b/tools/manifest/manifest.py
@@ -7,7 +7,6 @@
 from multiprocessing import Pool, cpu_count
 from six import (
     PY3,
-    binary_type,
     ensure_text,
     iteritems,
     itervalues,
@@ -67,15 +66,15 @@ class InvalidCacheError(Exception):
     pass
 
 
-item_classes = {"testharness": TestharnessTest,
-                "reftest": RefTest,
-                "print-reftest": PrintRefTest,
-                "crashtest": CrashTest,
-                "manual": ManualTest,
-                "wdspec": WebDriverSpecTest,
-                "conformancechecker": ConformanceCheckerTest,
-                "visual": VisualTest,
-                "support": SupportFile}  # type: Dict[str, Type[ManifestItem]]
+item_classes = {u"testharness": TestharnessTest,
+                u"reftest": RefTest,
+                u"print-reftest": PrintRefTest,
+                u"crashtest": CrashTest,
+                u"manual": ManualTest,
+                u"wdspec": WebDriverSpecTest,
+                u"conformancechecker": ConformanceCheckerTest,
+                u"visual": VisualTest,
+                u"support": SupportFile}  # type: Dict[Text, Type[ManifestItem]]
 
 
 def compute_manifest_items(source_file):
@@ -104,7 +103,7 @@ def __init__(self, manifest):
         self.json_obj = None  # type: None
 
     def __setitem__(self, key, value):
-        # type: (str, TypeData) -> None
+        # type: (Text, TypeData) -> None
         if self.initialized:
             raise AttributeError
         dict.__setitem__(self, key, value)
@@ -120,7 +119,7 @@ def paths(self):
         return rv
 
     def type_by_path(self):
-        # type: () -> Dict[Tuple[Text, ...], str]
+        # type: () -> Dict[Tuple[Text, ...], Text]
         rv = {}
         for item_type, item_data in iteritems(self):
             for item in item_data:
@@ -128,26 +127,25 @@ def type_by_path(self):
         return rv
 
 
-
 class Manifest(object):
-    def __init__(self, tests_root=None, url_base="/"):
-        # type: (Optional[str], Text) -> None
+    def __init__(self, tests_root, url_base="/"):
+        # type: (Text, Text) -> None
         assert url_base is not None
         self._data = ManifestData(self)  # type: ManifestData
-        self.tests_root = tests_root  # type: Optional[str]
+        self.tests_root = tests_root  # type: Text
         self.url_base = url_base  # type: Text
 
     def __iter__(self):
-        # type: () -> Iterator[Tuple[str, Text, Set[ManifestItem]]]
+        # type: () -> Iterator[Tuple[Text, Text, Set[ManifestItem]]]
         return self.itertypes()
 
     def itertypes(self, *types):
-        # type: (*str) -> Iterator[Tuple[str, Text, Set[ManifestItem]]]
+        # type: (*Text) -> Iterator[Tuple[Text, Text, Set[ManifestItem]]]
         for item_type in (types or sorted(self._data.keys())):
             for path in self._data[item_type]:
-                str_path = os.sep.join(path)
+                rel_path = os.sep.join(path)
                 tests = self._data[item_type][path]
-                yield item_type, str_path, tests
+                yield item_type, rel_path, tests
 
     def iterpath(self, path):
         # type: (Text) -> Iterable[ManifestItem]
@@ -171,7 +169,7 @@ def iterdir(self, dir_name):
                         yield test
 
     def update(self, tree, parallel=True):
-        # type: (Iterable[Tuple[Union[SourceFile, bytes], bool]], bool) -> bool
+        # type: (Iterable[Tuple[Text, Optional[Text], bool]], bool) -> bool
         """Update the manifest given an iterable of items that make up the updated manifest.
 
         The iterable must either generate tuples of the form (SourceFile, True) for paths
@@ -191,9 +189,7 @@ def update(self, tree, parallel=True):
 
         to_update = []
 
-        for path_str, file_hash, updated in tree:
-            assert isinstance(path_str, (binary_type, text_type))
-            path = ensure_text(path_str)
+        for path, file_hash, updated in tree:
             path_parts = tuple(path.split(os.path.sep))
             is_new = path_parts not in remaining_manifest_paths
 
@@ -208,6 +204,7 @@ def update(self, tree, parallel=True):
             if not updated:
                 remaining_manifest_paths.remove(path_parts)
             else:
+                assert self.tests_root is not None
                 source_file = SourceFile(self.tests_root,
                                          path,
                                          self.url_base,
@@ -290,7 +287,7 @@ def to_json(self, caller_owns_obj=True):
 
     @classmethod
     def from_json(cls, tests_root, obj, types=None, callee_owns_obj=False):
-        # type: (str, Dict[Text, Any], Optional[Container[Text]], bool) -> Manifest
+        # type: (Text, Dict[Text, Any], Optional[Container[Text]], bool) -> Manifest
         """Load a manifest from a JSON object
 
         This loads a manifest for a given local test_root path from an
@@ -327,19 +324,19 @@ def from_json(cls, tests_root, obj, types=None, callee_owns_obj=False):
 
 
 def load(tests_root, manifest, types=None):
-    # type: (str, Union[IO[bytes], str], Optional[Container[Text]]) -> Optional[Manifest]
+    # type: (Text, Union[IO[bytes], Text], Optional[Container[Text]]) -> Optional[Manifest]
     logger = get_logger()
 
     logger.warning("Prefer load_and_update instead")
     return _load(logger, tests_root, manifest, types)
 
 
-__load_cache = {}  # type: Dict[str, Manifest]
+__load_cache = {}  # type: Dict[Text, Manifest]
 
 
 def _load(logger,  # type: Logger
-          tests_root,  # type: str
-          manifest,  # type: Union[IO[bytes], str]
+          tests_root,  # type: Text
+          manifest,  # type: Union[IO[bytes], Text]
           types=None,  # type: Optional[Container[Text]]
           allow_cached=True  # type: bool
           ):
@@ -376,13 +373,13 @@ def _load(logger,  # type: Logger
     return rv
 
 
-def load_and_update(tests_root,  # type: bytes
-                    manifest_path,  # type: bytes
+def load_and_update(tests_root,  # type: Union[Text, bytes]
+                    manifest_path,  # type: Union[Text, bytes]
                     url_base,  # type: Text
                     update=True,  # type: bool
                     rebuild=False,  # type: bool
-                    metadata_path=None,  # type: Optional[bytes]
-                    cache_root=None,  # type: Optional[bytes]
+                    metadata_path=None,  # type: Optional[Union[Text, bytes]]
+                    cache_root=None,  # type: Optional[Union[Text, bytes]]
                     working_copy=True,  # type: bool
                     types=None,  # type: Optional[Container[Text]]
                     write_manifest=True,  # type: bool
@@ -390,6 +387,43 @@ def load_and_update(tests_root,  # type: bytes
                     parallel=True  # type: bool
                     ):
     # type: (...) -> Manifest
+
+    # This function is now a facade for the purposes of type conversion, so that
+    # the external API can accept paths as text or (utf8) bytes, but internal
+    # functions always use Text.
+
+    metadata_path_text = ensure_text(metadata_path) if metadata_path is not None else None
+    cache_root_text = ensure_text(cache_root) if cache_root is not None else None
+
+    return _load_and_update(ensure_text(tests_root),
+                            ensure_text(manifest_path),
+                            url_base,
+                            update=update,
+                            rebuild=rebuild,
+                            metadata_path=metadata_path_text,
+                            cache_root=cache_root_text,
+                            working_copy=working_copy,
+                            types=types,
+                            write_manifest=write_manifest,
+                            allow_cached=allow_cached,
+                            parallel=parallel)
+
+
+def _load_and_update(tests_root,  # type: Text
+                     manifest_path,  # type: Text
+                     url_base,  # type: Text
+                     update=True,  # type: bool
+                     rebuild=False,  # type: bool
+                     metadata_path=None,  # type: Optional[Text]
+                     cache_root=None,  # type: Optional[Text]
+                     working_copy=True,  # type: bool
+                     types=None,  # type: Optional[Container[Text]]
+                     write_manifest=True,  # type: bool
+                     allow_cached=True,  # type: bool
+                     parallel=True  # type: bool
+                     ):
+    # type: (...) -> Manifest
+
     logger = get_logger()
 
     manifest = None
@@ -435,7 +469,7 @@ def load_and_update(tests_root,  # type: bytes
 
 
 def write(manifest, manifest_path):
-    # type: (Manifest, bytes) -> None
+    # type: (Manifest, Text) -> None
     dir_name = os.path.dirname(manifest_path)
     if not os.path.exists(dir_name):
         os.makedirs(dir_name)
diff --git a/tools/manifest/sourcefile.py b/tools/manifest/sourcefile.py
index 524eb0bddb255f..09316bd052c066 100644
--- a/tools/manifest/sourcefile.py
+++ b/tools/manifest/sourcefile.py
@@ -2,7 +2,8 @@
 import re
 import os
 from collections import deque
-from six import binary_type, ensure_text, iteritems, text_type
+from io import BytesIO
+from six import binary_type, iteritems, text_type
 from six.moves.urllib.parse import urljoin
 from fnmatch import fnmatch
 
@@ -10,7 +11,6 @@
 if MYPY:
     # MYPY is set to True when run under Mypy.
     from typing import Any
-    from typing import AnyStr
     from typing import BinaryIO
     from typing import Callable
     from typing import Deque
@@ -43,7 +43,7 @@
                    TestharnessTest,
                    VisualTest,
                    WebDriverSpecTest)
-from .utils import ContextManagerBytesIO, cached_property
+from .utils import cached_property
 
 wd_pattern = "*.py"
 js_meta_re = re.compile(br"//\s*META:\s*(\w*)=(.*)$")
@@ -185,22 +185,22 @@ def _parse_xml(f):
 
 
 class SourceFile(object):
-    parsers = {"html":_parse_html,
-               "xhtml":_parse_xml,
-               "svg":_parse_xml}  # type: Dict[Text, Callable[[BinaryIO], ElementTree.ElementTree]]
+    parsers = {u"html":_parse_html,
+               u"xhtml":_parse_xml,
+               u"svg":_parse_xml}  # type: Dict[Text, Callable[[BinaryIO], ElementTree.ElementTree]]
 
-    root_dir_non_test = {"common"}
+    root_dir_non_test = {u"common"}
 
-    dir_non_test = {"resources",
-                    "support",
-                    "tools"}
+    dir_non_test = {u"resources",
+                    u"support",
+                    u"tools"}
 
-    dir_path_non_test = {("css21", "archive"),
-                         ("css", "CSS2", "archive"),
-                         ("css", "common")}  # type: Set[Tuple[bytes, ...]]
+    dir_path_non_test = {(u"css21", u"archive"),
+                         (u"css", u"CSS2", u"archive"),
+                         (u"css", u"common")}  # type: Set[Tuple[Text, ...]]
 
-    def __init__(self, tests_root, rel_path_str, url_base, hash=None, contents=None):
-        # type: (AnyStr, AnyStr, Text, Optional[Text], Optional[bytes]) -> None
+    def __init__(self, tests_root, rel_path, url_base, hash=None, contents=None):
+        # type: (Text, Text, Text, Optional[Text], Optional[bytes]) -> None
         """Object representing a file in a source tree.
 
         :param tests_root: Path to the root of the source tree
@@ -209,7 +209,6 @@ def __init__(self, tests_root, rel_path_str, url_base, hash=None, contents=None)
         :param contents: Byte array of the contents of the file or ``None``.
         """
 
-        rel_path = ensure_text(rel_path_str)
         assert not os.path.isabs(rel_path), rel_path
         if os.name == "nt":
             # do slash normalization on Windows
@@ -224,7 +223,7 @@ def __init__(self, tests_root, rel_path_str, url_base, hash=None, contents=None)
 
         meta_flags = name.split(".")[1:]
 
-        self.tests_root = ensure_text(tests_root)  # type: Text
+        self.tests_root = tests_root  # type: Text
         self.rel_path = rel_path  # type: Text
         self.dir_path = dir_path  # type: Text
         self.filename = filename  # type: Text
@@ -249,7 +248,7 @@ def __getstate__(self):
         return rv
 
     def name_prefix(self, prefix):
-        # type: (bytes) -> bool
+        # type: (Text) -> bool
         """Check if the filename starts with a given prefix
 
         :param prefix: The prefix to check"""
@@ -270,13 +269,8 @@ def open(self):
         * the contents specified in the constructor, if any;
         * a File object opened for reading the file contents.
         """
-
         if self.contents is not None:
-            wrapped = ContextManagerBytesIO(self.contents)
-            if MYPY:
-                file_obj = cast(BinaryIO, wrapped)
-            else:
-                file_obj = wrapped
+            file_obj = BytesIO(self.contents)  # type: BinaryIO
         else:
             file_obj = open(self.path, 'rb')
         return file_obj
@@ -337,11 +331,11 @@ def name_is_non_test(self):
         """Check if the file name matches the conditions for the file to
         be a non-test file"""
         return (self.is_dir() or
-                self.name_prefix("MANIFEST") or
-                self.filename == "META.yml" or
-                self.filename.startswith(".") or
-                self.filename.endswith(".headers") or
-                self.filename.endswith(".ini") or
+                self.name_prefix(u"MANIFEST") or
+                self.filename == u"META.yml" or
+                self.filename.startswith(u".") or
+                self.filename.endswith(u".headers") or
+                self.filename.endswith(u".ini") or
                 self.in_non_test_dir())
 
     @property
@@ -441,14 +435,14 @@ def markup_type(self):
 
         if not ext:
             return None
-        if ext[0] == ".":
+        if ext[0] == u".":
             ext = ext[1:]
-        if ext in ["html", "htm"]:
-            return "html"
-        if ext in ["xhtml", "xht", "xml"]:
-            return "xhtml"
-        if ext == "svg":
-            return "svg"
+        if ext in [u"html", u"htm"]:
+            return u"html"
+        if ext in [u"xhtml", u"xht", u"xml"]:
+            return u"xhtml"
+        if ext == u"svg":
+            return u"svg"
         return None
 
     @cached_property
diff --git a/tools/manifest/utils.py b/tools/manifest/utils.py
index aefc2c94f40e1a..36c1a983101a8e 100644
--- a/tools/manifest/utils.py
+++ b/tools/manifest/utils.py
@@ -2,16 +2,12 @@
 import platform
 import subprocess
 
-from six import BytesIO
-
 MYPY = False
 if MYPY:
     # MYPY is set to True when run under Mypy.
     from typing import Text
     from typing import Callable
-    from typing import AnyStr
     from typing import Any
-    from typing import BinaryIO
     from typing import Generic
     from typing import TypeVar
     from typing import Optional
@@ -22,72 +18,62 @@
     T = object()
     Generic[T] = object
 
+
 def rel_path_to_url(rel_path, url_base="/"):
-    # type: (bytes, Text) -> Text
+    # type: (Text, Text) -> Text
     assert not os.path.isabs(rel_path), rel_path
-    if url_base[0] != "/":
-        url_base = "/" + url_base
-    if url_base[-1] != "/":
-        url_base += "/"
-    return url_base + rel_path.replace(os.sep, "/")
+    if url_base[0] != u"/":
+        url_base = u"/" + url_base
+    if url_base[-1] != u"/":
+        url_base += u"/"
+    return url_base + rel_path.replace(os.sep, u"/")
 
 
 def from_os_path(path):
-    # type: (AnyStr) -> AnyStr
-    assert os.path.sep == "/" or platform.system() == "Windows"
-    if "/" == os.path.sep:
+    # type: (Text) -> Text
+    assert os.path.sep == u"/" or platform.system() == "Windows"
+    if u"/" == os.path.sep:
         rv = path
     else:
-        rv = path.replace(os.path.sep, "/")
-    if "\\" in rv:
+        rv = path.replace(os.path.sep, u"/")
+    if u"\\" in rv:
         raise ValueError("path contains \\ when separator is %s" % os.path.sep)
     return rv
 
 
 def to_os_path(path):
-    # type: (AnyStr) -> AnyStr
-    assert os.path.sep == "/" or platform.system() == "Windows"
-    if "\\" in path:
+    # type: (Text) -> Text
+    assert os.path.sep == u"/" or platform.system() == "Windows"
+    if u"\\" in path:
         raise ValueError("normalised path contains \\")
-    if "/" == os.path.sep:
+    if u"/" == os.path.sep:
         return path
-    return path.replace("/", os.path.sep)
+    return path.replace(u"/", os.path.sep)
 
 
 def git(path):
-    # type: (bytes) -> Optional[Callable[..., Text]]
+    # type: (Text) -> Optional[Callable[..., Text]]
     def gitfunc(cmd, *args):
-        # type: (bytes, *bytes) -> Text
-        full_cmd = ["git", cmd] + list(args)
+        # type: (Text, *Text) -> Text
+        full_cmd = [u"git", cmd] + list(args)
         try:
             return subprocess.check_output(full_cmd, cwd=path, stderr=subprocess.STDOUT).decode('utf8')
         except Exception as e:
             if platform.uname()[0] == "Windows" and isinstance(e, WindowsError):
-                full_cmd[0] = "git.bat"
+                full_cmd[0] = u"git.bat"
                 return subprocess.check_output(full_cmd, cwd=path, stderr=subprocess.STDOUT).decode('utf8')
             else:
                 raise
 
     try:
         # this needs to be a command that fails if we aren't in a git repo
-        gitfunc("rev-parse", "--show-toplevel")
+        gitfunc(u"rev-parse", u"--show-toplevel")
     except (subprocess.CalledProcessError, OSError):
         return None
     else:
         return gitfunc
 
 
-class ContextManagerBytesIO(BytesIO):  # type: ignore
-    def __enter__(self):
-        # type: () -> BinaryIO
-        return self  # type: ignore
-
-    def __exit__(self, *args, **kwargs):
-        # type: (*Any, **Any) -> bool
-        self.close()
-        return True
-
-
 class cached_property(Generic[T]):
     def __init__(self, func):
         # type: (Callable[[Any], T]) -> None
diff --git a/tools/manifest/vcs.py b/tools/manifest/vcs.py
index d36fe570f07347..80c0512807a959 100644
--- a/tools/manifest/vcs.py
+++ b/tools/manifest/vcs.py
@@ -7,7 +7,6 @@
 
 from six import with_metaclass, PY2
 
-from .sourcefile import SourceFile
 from .utils import git
 
 try:
@@ -20,7 +19,7 @@
 MYPY = False
 if MYPY:
     # MYPY is set to True when run under Mypy.
-    from typing import Dict, Optional, List, Set, Text, Iterable, Any, Tuple, Union, Iterator
+    from typing import Dict, Optional, List, Set, Text, Iterable, Any, Tuple, Iterator
     from .manifest import Manifest  # cyclic import under MYPY guard
     if PY2:
         stat_result = Any
@@ -30,10 +29,10 @@
 
 def get_tree(tests_root, manifest, manifest_path, cache_root,
              working_copy=True, rebuild=False):
-    # type: (bytes, Manifest, Optional[bytes], Optional[bytes], bool, bool) -> FileSystem
+    # type: (Text, Manifest, Optional[Text], Optional[Text], bool, bool) -> FileSystem
     tree = None
     if cache_root is None:
-        cache_root = os.path.join(tests_root, ".wptcache")
+        cache_root = os.path.join(tests_root, u".wptcache")
     if not os.path.exists(cache_root):
         try:
             os.makedirs(cache_root)
@@ -54,7 +53,7 @@ def get_tree(tests_root, manifest, manifest_path, cache_root,
 
 class GitHasher(object):
     def __init__(self, path):
-        # type: (bytes) -> None
+        # type: (Text) -> None
         self.git = git(path)
 
     def _local_changes(self):
@@ -90,32 +89,34 @@ def hash_cache(self):
 
 
 class FileSystem(object):
-    def __init__(self, root, url_base, cache_path, manifest_path=None, rebuild=False):
-        # type: (bytes, Text, Optional[bytes], Optional[bytes], bool) -> None
-        self.root = os.path.abspath(root)
+    def __init__(self, tests_root, url_base, cache_path, manifest_path=None, rebuild=False):
+        # type: (Text, Text, Optional[Text], Optional[Text], bool) -> None
+        self.tests_root = tests_root
         self.url_base = url_base
         self.ignore_cache = None
         self.mtime_cache = None
+        tests_root_bytes = tests_root.encode("utf8")
         if cache_path is not None:
             if manifest_path is not None:
-                self.mtime_cache = MtimeCache(cache_path, root, manifest_path, rebuild)
-            if gitignore.has_ignore(root):
-                self.ignore_cache = GitIgnoreCache(cache_path, root, rebuild)
-        self.path_filter = gitignore.PathFilter(self.root,
-                                                extras=[".git/"],
+                self.mtime_cache = MtimeCache(cache_path, tests_root, manifest_path, rebuild)
+            if gitignore.has_ignore(tests_root_bytes):
+                self.ignore_cache = GitIgnoreCache(cache_path, tests_root, rebuild)
+        self.path_filter = gitignore.PathFilter(tests_root_bytes,
+                                                extras=[b".git/"],
                                                 cache=self.ignore_cache)
-        git = GitHasher(root)
+        git = GitHasher(tests_root)
         if git is not None:
             self.hash_cache = git.hash_cache()
         else:
             self.hash_cache = {}
 
     def __iter__(self):
-        # type: () -> Iterator[Tuple[bytes, Optional[bytes], bool]]
+        # type: () -> Iterator[Tuple[Text, Optional[Text], bool]]
         mtime_cache = self.mtime_cache
-        for dirpath, dirnames, filenames in self.path_filter(walk(self.root)):
+        for dirpath, dirnames, filenames in self.path_filter(
+                walk(self.tests_root.encode("utf8"))):
             for filename, path_stat in filenames:
-                path = os.path.join(dirpath, filename)
+                path = os.path.join(dirpath, filename).decode("utf8")
                 if mtime_cache is None or mtime_cache.updated(path, path_stat):
                     file_hash = self.hash_cache.get(path, None)
                     yield path, file_hash, True
@@ -131,7 +132,7 @@ def dump_caches(self):
 
 class CacheFile(with_metaclass(abc.ABCMeta)):
     def __init__(self, cache_root, tests_root, rebuild=False):
-        # type: (bytes, bytes, bool) -> None
+        # type: (Text, Text, bool) -> None
         self.tests_root = tests_root
         if not os.path.exists(cache_root):
             os.makedirs(cache_root)
@@ -141,7 +142,7 @@ def __init__(self, cache_root, tests_root, rebuild=False):
 
     @abc.abstractproperty
     def file_name(self):
-        # type: () -> bytes
+        # type: () -> Text
         pass
 
     def dump(self):
@@ -152,8 +153,8 @@ def dump(self):
             json.dump(self.data, f, indent=1)
 
     def load(self, rebuild=False):
-        # type: (bool) -> Dict[Any, Any]
-        data = {}  # type: Dict[Any, Any]
+        # type: (bool) -> Dict[Text, Any]
+        data = {}  # type: Dict[Text, Any]
         try:
             if not rebuild:
                 with open(self.path, 'r') as f:
@@ -167,22 +168,22 @@ def load(self, rebuild=False):
         return data
 
     def check_valid(self, data):
-        # type: (Dict[Any, Any]) -> Dict[Any, Any]
+        # type: (Dict[Text, Any]) -> Dict[Text, Any]
         """Check if the cached data is valid and return an updated copy of the
         cache containing only data that can be used."""
         return data
 
 
 class MtimeCache(CacheFile):
-    file_name = "mtime.json"
+    file_name = u"mtime.json"
 
     def __init__(self, cache_root, tests_root, manifest_path, rebuild=False):
-        # type: (bytes, bytes, bytes, bool) -> None
+        # type: (Text, Text, Text, bool) -> None
         self.manifest_path = manifest_path
         super(MtimeCache, self).__init__(cache_root, tests_root, rebuild)
 
     def updated(self, rel_path, stat):
-        # type: (bytes, stat_result) -> bool
+        # type: (Text, stat_result) -> bool
         """Return a boolean indicating whether the file changed since the cache was last updated.
 
         This implicitly updates the cache with the new mtime data."""
@@ -195,12 +196,12 @@ def updated(self, rel_path, stat):
 
     def check_valid(self, data):
         # type: (Dict[Any, Any]) -> Dict[Any, Any]
-        if data.get("/tests_root") != self.tests_root:
+        if data.get(u"/tests_root") != self.tests_root:
             self.modified = True
         else:
             if self.manifest_path is not None and os.path.exists(self.manifest_path):
                 mtime = os.path.getmtime(self.manifest_path)
-                if data.get("/manifest_path") != [self.manifest_path, mtime]:
+                if data.get(u"/manifest_path") != [self.manifest_path, mtime]:
                     self.modified = True
             else:
                 self.modified = True
@@ -228,10 +229,10 @@ def check_valid(self, data):
         # type: (Dict[Any, Any]) -> Dict[Any, Any]
         ignore_path = os.path.join(self.tests_root, ".gitignore")
         mtime = os.path.getmtime(ignore_path)
-        if data.get("/gitignore_file") != [ignore_path, mtime]:
+        if data.get(u"/gitignore_file") != [ignore_path, mtime]:
             self.modified = True
             data = {}
-            data["/gitignore_file"] = [ignore_path, mtime]
+            data[u"/gitignore_file"] = [ignore_path, mtime]
         return data
 
     def __contains__(self, key):
@@ -239,23 +240,23 @@ def __contains__(self, key):
         return key in self.data
 
     def __getitem__(self, key):
-        # type: (bytes) -> bool
+        # type: (Text) -> bool
         v = self.data[key]
         assert isinstance(v, bool)
         return v
 
     def __setitem__(self, key, value):
-        # type: (bytes, bool) -> None
+        # type: (Text, bool) -> None
         if self.data.get(key) != value:
             self.modified = True
             self.data[key] = value
 
     def __delitem__(self, key):
-        # type: (bytes) -> None
+        # type: (Text) -> None
         del self.data[key]
 
     def __iter__(self):
-        # type: () -> Iterator[bytes]
+        # type: () -> Iterator[Text]
         return iter(self.data)
 
     def __len__(self):
@@ -286,7 +287,7 @@ def walk(root):
     relpath = os.path.relpath
 
     root = os.path.abspath(root)
-    stack = deque([(root, "")])
+    stack = deque([(root, b"")])
 
     while stack:
         dir_path, rel_path = stack.popleft()
diff --git a/tools/tox.ini b/tools/tox.ini
index bbec1afbb6dd2f..45358c081efbb0 100644
--- a/tools/tox.ini
+++ b/tools/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py27,py35,py36,py37,py38,{py27,py35,py36,py37,py38}-flake8,{py35,py36,py37,py38}-mypy
+envlist = py27,py35,py36,py37,py38,{py27,py35,py36,py37,py38}-flake8,py38-mypy2,{py35,py38}-mypy3
 skipsdist=True
 skip_missing_interpreters = False
 
@@ -42,26 +42,20 @@ commands = flake8 --append-config={toxinidir}/py3-flake8.ini {posargs}
 deps = -rrequirements_flake8.txt
 commands = flake8 --append-config={toxinidir}/py3-flake8.ini {posargs}
 
-[testenv:py35-mypy]
+[testenv:py38-mypy2]
 deps = -rrequirements_mypy.txt
 changedir = {toxinidir}/..
 commands =
   mypy --config-file={toxinidir}/mypy.ini --no-incremental --py2 -p tools.manifest -p tools.lint -p tools.gitignore
 
-[testenv:py36-mypy]
+[testenv:py35-mypy3]
 deps = -rrequirements_mypy.txt
 changedir = {toxinidir}/..
 commands =
-  mypy --config-file={toxinidir}/mypy.ini --no-incremental --py2 -p tools.manifest -p tools.lint -p tools.gitignore
+  mypy --config-file={toxinidir}/mypy.ini --no-incremental -p tools.manifest -p tools.lint -p tools.gitignore
 
-[testenv:py37-mypy]
+[testenv:py38-mypy3]
 deps = -rrequirements_mypy.txt
 changedir = {toxinidir}/..
 commands =
-  mypy --config-file={toxinidir}/mypy.ini --no-incremental --py2 -p tools.manifest -p tools.lint -p tools.gitignore
-
-[testenv:py38-mypy]
-deps = -rrequirements_mypy.txt
-changedir = {toxinidir}/..
-commands =
-  mypy --config-file={toxinidir}/mypy.ini --no-incremental --py2 -p tools.manifest -p tools.lint -p tools.gitignore
+  mypy --config-file={toxinidir}/mypy.ini --no-incremental -p tools.manifest -p tools.lint -p tools.gitignore
diff --git a/tools/wpt/revlist.py b/tools/wpt/revlist.py
index 1893fdefa8346e..bd85612e2c2a7c 100644
--- a/tools/wpt/revlist.py
+++ b/tools/wpt/revlist.py
@@ -33,19 +33,22 @@ def parse_epoch(string):
 
 
 def get_tagged_revisions(pattern):
-    # type: (bytes) -> List[..., Dict]
+    # type: (Text) -> List[..., Dict]
     '''
     Returns the tagged revisions indexed by the committer date.
     '''
     git = get_git_cmd(wpt_root)
     args = [
         pattern,
-        '--sort=-committerdate',
-        '--format=%(refname:lstrip=2) %(objectname) %(committerdate:raw)',
-        '--count=100000'
+        u'--sort=-committerdate',
+        u'--format=%(refname:lstrip=2) %(objectname) %(committerdate:raw)',
+        u'--count=100000'
     ]
-    for line in git("for-each-ref", *args).splitlines():
-        tag, commit, date, _ = line.split(" ")
+    ref_list = git(u"for-each-ref", *args)
+    for line in ref_list.splitlines():
+        if not line:
+            continue
+        tag, commit, date, _ = line.split(u" ")
         date = int(date)
         yield tag, commit, date
 
@@ -81,7 +84,7 @@ def get_epoch_revisions(epoch, until, max_count):
     # Expected result: N,M,K,J,H,G,F,C,A
 
     cutoff_date = calculate_cutoff_date(until, epoch, epoch_offset)
-    for _, commit, date in get_tagged_revisions("refs/tags/merge_pr_*"):
+    for _, commit, date in get_tagged_revisions(u"refs/tags/merge_pr_*"):
         if count >= max_count:
             return
         if date < cutoff_date:
diff --git a/tools/wpt/testfiles.py b/tools/wpt/testfiles.py
index 47a02844edc337..c990aa21a65f6f 100644
--- a/tools/wpt/testfiles.py
+++ b/tools/wpt/testfiles.py
@@ -2,15 +2,14 @@
 import logging
 import os
 import re
-import subprocess
 import sys
 
-import six
 from collections import OrderedDict
-from six import iteritems
+from six import ensure_text, ensure_str, iteritems
 
 try:
     from ..manifest import manifest
+    from ..manifest.utils import git as get_git_cmd
 except ValueError:
     # if we're not within the tools package, the above is an import from above
     # the top-level which raises ValueError, so reimport it with an absolute
@@ -20,12 +19,12 @@
     # paths set up correctly to handle both and MYPY has no knowledge of our
     # sys.path magic
     from manifest import manifest  # type: ignore
+    from manifest.utils import git as get_git_cmd  # type: ignore
 
 MYPY = False
 if MYPY:
     # MYPY is set to True when run under Mypy.
     from typing import Any
-    from typing import Callable
     from typing import Dict
     from typing import Iterable
     from typing import List
@@ -35,30 +34,13 @@
     from typing import Set
     from typing import Text
     from typing import Tuple
-    from typing import Union
 
-here = os.path.dirname(__file__)
+here = ensure_text(os.path.dirname(__file__))
 wpt_root = os.path.abspath(os.path.join(here, os.pardir, os.pardir))
 
 logger = logging.getLogger()
 
 
-def get_git_cmd(repo_path):
-    # type: (bytes) -> Callable[..., Text]
-    """Create a function for invoking git commands as a subprocess."""
-    def git(cmd, *args):
-        # type: (Text, *Union[bytes, Text]) -> Text
-        full_cmd = [u"git", cmd] + list(item.decode("utf8") if isinstance(item, bytes) else item for item in args)  # type: List[Text]
-        try:
-            logger.debug(" ".join(full_cmd))
-            return subprocess.check_output(full_cmd, cwd=repo_path).decode("utf8").strip()
-        except subprocess.CalledProcessError as e:
-            logger.critical("Git command exited with status %i" % e.returncode)
-            logger.critical(e.output)
-            sys.exit(1)
-    return git
-
-
 def display_branch_point():
     # type: () -> None
     print(branch_point())
@@ -66,6 +48,10 @@ def display_branch_point():
 
 def branch_point():
     # type: () -> Optional[Text]
+    git = get_git_cmd(wpt_root)
+    if git is None:
+        raise Exception("git not found")
+
     if (os.environ.get("GITHUB_PULL_REQUEST", "false") == "false" and
         os.environ.get("GITHUB_BRANCH") == "master"):
         # For builds on the master branch just return the HEAD commit
@@ -85,7 +71,7 @@ def branch_point():
 
         # get everything in refs/heads and refs/remotes that doesn't include HEAD
         not_heads = [item for item in git("rev-parse", "--not", "--branches", "--remotes").split("\n")
-                     if item != "^%s" % head]
+                     if item and item != "^%s" % head]
 
         # get all commits on HEAD but not reachable from anything in not_heads
         commits = git("rev-list", "--topo-order", "--parents", "HEAD", *not_heads)
@@ -129,28 +115,33 @@ def branch_point():
             logger.debug("Using first commit on another branch as the branch point")
 
     logger.debug("Branch point from master: %s" % branch_point)
+    if branch_point:
+        branch_point = branch_point.strip()
     return branch_point
 
 
 def compile_ignore_rule(rule):
-    # type: (str) -> Pattern[str]
-    rule = rule.replace(os.path.sep, "/")
-    parts = rule.split("/")
+    # type: (Text) -> Pattern[Text]
+    rule = rule.replace(ensure_text(os.path.sep), u"/")
+    parts = rule.split(u"/")
     re_parts = []
     for part in parts:
-        if part.endswith("**"):
-            re_parts.append(re.escape(part[:-2]) + ".*")
-        elif part.endswith("*"):
-            re_parts.append(re.escape(part[:-1]) + "[^/]*")
+        if part.endswith(u"**"):
+            re_parts.append(re.escape(part[:-2]) + u".*")
+        elif part.endswith(u"*"):
+            re_parts.append(re.escape(part[:-1]) + u"[^/]*")
         else:
             re_parts.append(re.escape(part))
-    return re.compile("^%s$" % "/".join(re_parts))
+    return re.compile(u"^%s$" % u"/".join(re_parts))
 
 
 def repo_files_changed(revish, include_uncommitted=False, include_new=False):
-    # type: (str, bool, bool) -> Set[Text]
+    # type: (Text, bool, bool) -> Set[Text]
     git = get_git_cmd(wpt_root)
-    files_list = git("diff", "--name-only", "-z", revish).split("\0")
+    if git is None:
+        raise Exception("git not found")
+
+    files_list = git("diff", "--name-only", "-z", revish).split(u"\0")
     assert not files_list[-1]
     files = set(files_list[:-1])
 
@@ -174,7 +165,7 @@ def repo_files_changed(revish, include_uncommitted=False, include_new=False):
 
 
 def exclude_ignored(files, ignore_rules):
-    # type: (Iterable[Text], Optional[Sequence[str]]) -> Tuple[List[Text], List[Text]]
+    # type: (Iterable[Text], Optional[Sequence[Text]]) -> Tuple[List[Text], List[Text]]
     if ignore_rules is None:
         ignore_rules = []
     compiled_ignore_rules = [compile_ignore_rule(item) for item in ignore_rules]
@@ -194,8 +185,8 @@ def exclude_ignored(files, ignore_rules):
     return changed, ignored
 
 
-def files_changed(revish,  # type: str
-                  ignore_rules=None,  # type: Optional[Sequence[str]]
+def files_changed(revish,  # type: Text
+                  ignore_rules=None,  # type: Optional[Sequence[Text]]
                   include_uncommitted=False,  # type: bool
                   include_new=False  # type: bool
                   ):
@@ -216,29 +207,29 @@ def files_changed(revish,  # type: str
 
 
 def _in_repo_root(full_path):
-    # type: (Union[bytes, Text]) -> bool
+    # type: (Text) -> bool
     rel_path = os.path.relpath(full_path, wpt_root)
     path_components = rel_path.split(os.sep)
     return len(path_components) < 2
 
 
 def load_manifest(manifest_path=None, manifest_update=True):
-    # type: (Optional[str], bool) -> manifest.Manifest
+    # type: (Optional[Text], bool) -> manifest.Manifest
     if manifest_path is None:
-        manifest_path = os.path.join(wpt_root, "MANIFEST.json")
+        manifest_path = os.path.join(wpt_root, u"MANIFEST.json")
     return manifest.load_and_update(wpt_root, manifest_path, "/",
                                     update=manifest_update)
 
 
 def affected_testfiles(files_changed,  # type: Iterable[Text]
-                       skip_dirs=None,  # type: Optional[Set[str]]
-                       manifest_path=None,  # type: Optional[str]
+                       skip_dirs=None,  # type: Optional[Set[Text]]
+                       manifest_path=None,  # type: Optional[Text]
                        manifest_update=True  # type: bool
                        ):
-    # type: (...) -> Tuple[Set[Text], Set[str]]
+    # type: (...) -> Tuple[Set[Text], Set[Text]]
     """Determine and return list of test files that reference changed files."""
     if skip_dirs is None:
-        skip_dirs = {"conformance-checkers", "docs", "tools"}
+        skip_dirs = {u"conformance-checkers", u"docs", u"tools"}
     affected_testfiles = set()
     # Exclude files that are in the repo root, because
     # they are not part of any test.
@@ -281,7 +272,7 @@ def affected_testfiles(files_changed,  # type: Iterable[Text]
                                 for interface in interfaces_changed]
 
     def affected_by_wdspec(test):
-        # type: (str) -> bool
+        # type: (Text) -> bool
         affected = False
         if test in wdspec_test_files:
             for support_full_path, _ in nontest_changed_paths:
@@ -297,7 +288,7 @@ def affected_by_wdspec(test):
         return affected
 
     def affected_by_interfaces(file_contents):
-        # type: (Union[bytes, Text]) -> bool
+        # type: (Text) -> bool
         if len(interfaces_changed_names) > 0:
             if 'idlharness.js' in file_contents:
                 for interface in interfaces_changed_names:
@@ -375,27 +366,26 @@ def get_parser_affected():
 
 
 def get_revish(**kwargs):
-    # type: (**Any) -> bytes
+    # type: (**Any) -> Text
     revish = kwargs.get("revish")
     if revish is None:
-        revish = "%s..HEAD" % branch_point()
-    if isinstance(revish, six.text_type):
-        revish = revish.encode("utf8")
-    assert isinstance(revish, six.binary_type)
-    return revish
+        revish = u"%s..HEAD" % branch_point()
+    return ensure_text(revish).strip()
 
 
 def run_changed_files(**kwargs):
     # type: (**Any) -> None
     revish = get_revish(**kwargs)
-    changed, _ = files_changed(revish, kwargs["ignore_rules"],
+    changed, _ = files_changed(revish,
+                               kwargs["ignore_rules"],
                                include_uncommitted=kwargs["modified"],
                                include_new=kwargs["new"])
 
-    separator = "\0" if kwargs["null"] else "\n"
+    separator = u"\0" if kwargs["null"] else u"\n"
 
     for item in sorted(changed):
-        sys.stdout.write(os.path.relpath(six.ensure_str(item), wpt_root) + separator)
+        line = os.path.relpath(item, wpt_root) + separator
+        sys.stdout.write(ensure_str(line))
 
 
 def run_tests_affected(**kwargs):
diff --git a/tools/wpt/tests/test_testfiles.py b/tools/wpt/tests/test_testfiles.py
index 9fd6c6f9b85153..81f528457c6d68 100644
--- a/tools/wpt/tests/test_testfiles.py
+++ b/tools/wpt/tests/test_testfiles.py
@@ -6,13 +6,13 @@
 
 
 def test_getrevish_kwarg():
-    assert testfiles.get_revish(revish=u"abcdef") == b"abcdef"
-    assert testfiles.get_revish(revish=b"abcdef") == b"abcdef"
+    assert testfiles.get_revish(revish=u"abcdef") == u"abcdef"
+    assert testfiles.get_revish(revish=b"abcdef") == u"abcdef"
 
 
 def test_getrevish_implicit():
     with patch("tools.wpt.testfiles.branch_point", return_value=u"base"):
-        assert testfiles.get_revish() == b"base..HEAD"
+        assert testfiles.get_revish() == u"base..HEAD"
 
 
 def test_affected_testfiles():
diff --git a/tools/wpt/utils.py b/tools/wpt/utils.py
index 9a6c0646712064..18551481ab8cae 100644
--- a/tools/wpt/utils.py
+++ b/tools/wpt/utils.py
@@ -5,15 +5,17 @@
 import zipfile
 from io import BytesIO
 
-try:
-    from typing import Any, Callable
-except ImportError:
-    pass
+MYPY = False
+if MYPY:
+    from typing import Any
+    from typing import Callable
+    from typing import Dict
 
 logger = logging.getLogger(__name__)
 
 
 class Kwargs(dict):
+    # type: Dict[Any, Any]
     def set_if_none(self,
                     name,            # type: str
                     value,           # type: Any
diff --git a/tools/wptrunner/wptrunner/tests/test_update.py b/tools/wptrunner/wptrunner/tests/test_update.py
index 6dc04cedf34c7a..a1ba8de2878064 100644
--- a/tools/wptrunner/wptrunner/tests/test_update.py
+++ b/tools/wptrunner/wptrunner/tests/test_update.py
@@ -29,6 +29,19 @@ def SourceFileWithTest(path, hash, cls, *args):
     return s
 
 
+def tree_and_sourcefile_mocks(source_files):
+    paths_dict = {}
+    tree = []
+    for source_file, file_hash, updated in source_files:
+        paths_dict[source_file.rel_path] = source_file
+        tree.append([source_file.rel_path, file_hash, updated])
+
+    def MockSourceFile(tests_root, path, url_base, file_hash):
+        return paths_dict[path]
+
+    return tree, MockSourceFile
+
+
 item_classes = {"testharness": manifest_item.TestharnessTest,
                 "reftest": manifest_item.RefTest,
                 "manual": manifest_item.ManualTest,
@@ -129,9 +142,11 @@ def create_test_manifest(tests, url_base="/"):
     source_files = []
     for i, (test, _, test_type, _) in enumerate(tests):
         if test_type:
-            source_files.append((SourceFileWithTest(test, str(i) * 40, item_classes[test_type]), True))
-    m = manifest.Manifest()
-    m.update(source_files)
+            source_files.append(SourceFileWithTest(test, str(i) * 40, item_classes[test_type]))
+    m = manifest.Manifest("")
+    tree, sourcefile_mock = tree_and_sourcefile_mocks((item, None, True) for item in source_files)
+    with mock.patch("manifest.manifest.SourceFile", side_effect=sourcefile_mock):
+        m.update(tree)
     return m
 
 
diff --git a/tools/wptrunner/wptrunner/tests/test_wpttest.py b/tools/wptrunner/wptrunner/tests/test_wpttest.py
index dbccd90e339580..4eee9ccb1b6993 100644
--- a/tools/wptrunner/wptrunner/tests/test_wpttest.py
+++ b/tools/wptrunner/wptrunner/tests/test_wpttest.py
@@ -1,11 +1,13 @@
+import mock
 from io import BytesIO
-from mock import Mock
 
 from manifest import manifest as wptmanifest
 from manifest.item import TestharnessTest, RefTest
 from manifest.utils import to_os_path
+from . test_update import tree_and_sourcefile_mocks
 from .. import manifestexpected, wpttest
 
+
 dir_ini_0 = b"""\
 prefs: [a:b]
 """
@@ -83,7 +85,7 @@
 
 
 def make_mock_manifest(*items):
-    rv = Mock(tests_root="/foobar")
+    rv = mock.Mock(tests_root="/foobar")
     tests = []
     rv.__iter__ = lambda self: iter(tests)
     rv.__getitem__ = lambda self, k: tests[k]
@@ -204,13 +206,15 @@ def test_metadata_fuzzy():
                    references=[["/a/fuzzy-ref.html", "=="]],
                    fuzzy=[[["/a/fuzzy.html", '/a/fuzzy-ref.html', '=='],
                            [[2, 3], [10, 15]]]])
-    s = Mock(rel_path="a/fuzzy.html", rel_path_parts=("a", "fuzzy.html"), hash="0"*40)
-    s.manifest_items = Mock(return_value=(item.item_type, [item]))
+    s = mock.Mock(rel_path="a/fuzzy.html", rel_path_parts=("a", "fuzzy.html"), hash="0"*40)
+    s.manifest_items = mock.Mock(return_value=(item.item_type, [item]))
 
 
-    manifest = wptmanifest.Manifest()
+    manifest = wptmanifest.Manifest("")
 
-    assert manifest.update([(s, True)]) is True
+    tree, sourcefile_mock = tree_and_sourcefile_mocks([(s, None, True)])
+    with mock.patch("manifest.manifest.SourceFile", side_effect=sourcefile_mock):
+        assert manifest.update(tree) is True
 
     test_metadata = manifestexpected.static.compile(BytesIO(test_fuzzy),
                                                     {},