Skip to content

Commit

Permalink
Merge pull request #378 from kyleam/venv-pyc
Browse files Browse the repository at this point in the history
retrace: Map pyc files back to their source
  • Loading branch information
yarikoptic committed Mar 7, 2019
2 parents fd7e202 + 61c81e1 commit 17af512
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 9 deletions.
31 changes: 29 additions & 2 deletions reproman/distributions/tests/test_venv.py
Expand Up @@ -60,6 +60,7 @@ def venv_test_dir():
def test_venv_identify_distributions(venv_test_dir):
libpaths = {p[-1]: os.path.join("lib", PY_VERSION, *p)
for p in [("abc.py",),
("importlib", "yaml", "machinery.py"),
("site-packages", "yaml", "parser.py"),
("site-packages", "attr", "filters.py")]}

Expand All @@ -71,8 +72,10 @@ def test_venv_identify_distributions(venv_test_dir):
os.path.join("venv1", libpaths["filters.py"]),
# A virtualenv file that isn't part of any particular package.
os.path.join("venv1", "bin", "python"),
# A link to the outside world.
os.path.join("venv1", libpaths["abc.py"])
# A link to the outside world ...
os.path.join("venv1", libpaths["abc.py"]),
# or in a directory that is a link to the outside world.
os.path.join("venv1", libpaths["machinery.py"])
]
path_args.append("/sbin/iptables")

Expand All @@ -88,6 +91,7 @@ def test_venv_identify_distributions(venv_test_dir):
assert unknown_files == {
"/sbin/iptables",
op.realpath(os.path.join("venv1", libpaths["abc.py"])),
op.realpath(os.path.join("venv1", libpaths["machinery.py"])),
# The editable package was added by VenvTracer as an unknown file.
os.path.join(venv_test_dir, "minimal_pymodule")}

Expand Down Expand Up @@ -151,6 +155,29 @@ def test_venv_install(venv_test_dir, tmpdir):
for p in e.packages])


@pytest.mark.integration
def test_venv_pyc(venv_test_dir, tmpdir):
from reproman.api import retrace
tmpdir = str(tmpdir)
venv_path = op.join("lib", PY_VERSION, "site-packages", "attr")
pyc_path = op.join(
venv_test_dir, "venv1", venv_path, "__pycache__",
"exceptions.cpython-{v.major}{v.minor}.pyc".format(v=sys.version_info))

if not op.exists(pyc_path):
pytest.skip("Expected file does not exist: {}".format(pyc_path))

distributions, unknown_files = retrace([pyc_path])
assert not unknown_files
assert len(distributions) == 1
expect = {"environments":
[{"packages": [{"files": [op.join(venv_path, "exceptions.py")],
"name": "attrs",
"editable": False}]}]}
assert_is_subset_recur(expect,
attr.asdict(distributions[0]), [dict, list])


def test_venv_install_noop():
dist = VenvDistribution(
name="venv",
Expand Down
11 changes: 5 additions & 6 deletions reproman/distributions/venv.py
Expand Up @@ -151,13 +151,12 @@ def identify_distributions(self, files):
pkg_to_found_files[file_to_pkg[fullpath]].append(
os.path.relpath(path, venv_path))

# Some files, like venvs/dev/lib/python2.7/abc.py could be
# symlinks populated by virtualenv itself during venv creation
# since it relies on system wide python environment. So we need
# to resolve those into filenames which could be associated with
# system wide installation of python
# Some virtualenv files are links to system files. Files themselves
# may be linked or they may be in a linked directory. We need to
# resolve these links and pass them out as unknown files for other
# tracers to use.
for path in unknown_files.copy():
if is_subpath(path, venv_path) and op.islink(path):
if is_subpath(path, venv_path):
rpath = op.realpath(path)
# ... but the resolved link may point to another path under
# the environment (e.g., bin/python -> bin/python3), and we
Expand Down
6 changes: 5 additions & 1 deletion reproman/interface/retrace.py
Expand Up @@ -25,6 +25,7 @@
from ..support.exceptions import InsufficientArgumentsError
from ..support.param import Parameter
from ..utils import assure_list
from ..utils import pycache_source
from ..utils import to_unicode
from ..resource import get_manager

Expand Down Expand Up @@ -98,6 +99,9 @@ def __call__(path=None, spec=None, output_file=None,

# Convert paths to unicode
paths = map(to_unicode, paths)
# If .pyc files come in (common for ReprozipProvenance), the tracers
# don't recognize them.
paths = (pycache_source(p) or p for p in paths)
# The tracers assume normalized paths.
paths = list(map(normpath, paths))

Expand Down Expand Up @@ -132,7 +136,7 @@ def __call__(path=None, spec=None, output_file=None,
RepromanProvenance.write(stream, spec)
if stream is not sys.stdout:
stream.close()

return distributions, files

# TODO: session should be with a state. Idea is that if we want
# to trace while inheriting all custom PATHs which that run might have
Expand Down
32 changes: 32 additions & 0 deletions reproman/tests/test_utils.py
Expand Up @@ -47,6 +47,7 @@
from ..utils import PathRoot, is_subpath
from ..utils import parse_semantic_version
from ..utils import merge_dicts
from ..utils import pycache_source

from .utils import ok_, eq_, assert_false, assert_equal, assert_true

Expand Down Expand Up @@ -560,6 +561,37 @@ def test_merge_dicts():
assert merge_dicts([{1: 1}, {2: 2}, {1: 3}]) == {1: 3, 2: 2}


@pytest.mark.parametrize(
"case",
[{"label": "full-py2",
"value": "/tmp/a/b/c/d.pyc",
"expected": "/tmp/a/b/c/d.py"},
{"label": "full",
"value": "/tmp/a/b/c/__pycache__/d.cpython-35.pyc",
"expected": "/tmp/a/b/c/d.py"},
{"label": "relative-py2",
"value": "d.pyc",
"expected": "d.py"},
{"label": "relative-py2-pyo",
"value": "d.pyo",
"expected": "d.py"},
{"label": "relative",
"value": "__pycache__/d.cpython-35.pyc",
"expected": "d.py"},
{"label": "relative-pyo",
"value": "__pycache__/d.cpython-35.opt-1.pyc",
"expected": "d.py"},
{"label": "not pyc",
"value": "not a pycache",
"expected": None},
{"label": "empty",
"value": "",
"expected": None}],
ids=itemgetter("label"))
def test_pycache_source(case):
assert pycache_source(case["value"]) == case["expected"]


def test_line_profile():
pytest.importorskip("line_profiler")

Expand Down
31 changes: 31 additions & 0 deletions reproman/utils.py
Expand Up @@ -13,6 +13,7 @@
from shlex import quote as shlex_quote
import time

import os.path as op
from os.path import curdir, basename, exists, realpath, islink, join as opj, isabs, normpath, expandvars, expanduser, abspath
from urllib.parse import quote as urlquote, unquote as urlunquote, urlsplit

Expand Down Expand Up @@ -1382,4 +1383,34 @@ def merge_dicts(ds):
return merged


def pycache_source(path):
"""Map a pycache path to the original path.
Parameters
----------
path : str
A Python cache file.
Returns
-------
Path of cached Python file (str) or None if `path` doesn't look like a
cache file.
"""
if not (path.endswith(".pyc") or path.endswith(".pyo")):
lgr.debug("Path does not look like a Python cache file: %s", path)
return

if "__pycache__" not in path: # py2
pyfile = path[:-1]
else:
# It should be a py3-style path, e.g., "__pycache__/f.cpython-35.pyc"
# or "__pycache__/f.cpython-35.opt-2.pyc".
leading, base = op.split(path)
name = base.split(".", 1)[0]
pyfile = op.join(leading[:-len("__pycache__")], name + ".py")
lgr.debug("Converted pycache file %s to source file %s",
path, pyfile)
return pyfile


lgr.log(5, "Done importing reproman.utils")

0 comments on commit 17af512

Please sign in to comment.