diff --git a/reproman/distributions/tests/test_venv.py b/reproman/distributions/tests/test_venv.py index 699511500..c1a7f3fa7 100644 --- a/reproman/distributions/tests/test_venv.py +++ b/reproman/distributions/tests/test_venv.py @@ -60,6 +60,7 @@ def venv_test_dir(): def test_venv_identify_distributions(venv_test_dir): libpaths = {p[-1]: os.path.join("lib", PY_VERSION, *p) for p in [("abc.py",), + ("importlib", "yaml", "machinery.py"), ("site-packages", "yaml", "parser.py"), ("site-packages", "attr", "filters.py")]} @@ -71,8 +72,10 @@ def test_venv_identify_distributions(venv_test_dir): os.path.join("venv1", libpaths["filters.py"]), # A virtualenv file that isn't part of any particular package. os.path.join("venv1", "bin", "python"), - # A link to the outside world. - os.path.join("venv1", libpaths["abc.py"]) + # A link to the outside world ... + os.path.join("venv1", libpaths["abc.py"]), + # or in a directory that is a link to the outside world. + os.path.join("venv1", libpaths["machinery.py"]) ] path_args.append("/sbin/iptables") @@ -88,6 +91,7 @@ def test_venv_identify_distributions(venv_test_dir): assert unknown_files == { "/sbin/iptables", op.realpath(os.path.join("venv1", libpaths["abc.py"])), + op.realpath(os.path.join("venv1", libpaths["machinery.py"])), # The editable package was added by VenvTracer as an unknown file. os.path.join(venv_test_dir, "minimal_pymodule")} @@ -151,6 +155,29 @@ def test_venv_install(venv_test_dir, tmpdir): for p in e.packages]) +@pytest.mark.integration +def test_venv_pyc(venv_test_dir, tmpdir): + from reproman.api import retrace + tmpdir = str(tmpdir) + venv_path = op.join("lib", PY_VERSION, "site-packages", "attr") + pyc_path = op.join( + venv_test_dir, "venv1", venv_path, "__pycache__", + "exceptions.cpython-{v.major}{v.minor}.pyc".format(v=sys.version_info)) + + if not op.exists(pyc_path): + pytest.skip("Expected file does not exist: {}".format(pyc_path)) + + distributions, unknown_files = retrace([pyc_path]) + assert not unknown_files + assert len(distributions) == 1 + expect = {"environments": + [{"packages": [{"files": [op.join(venv_path, "exceptions.py")], + "name": "attrs", + "editable": False}]}]} + assert_is_subset_recur(expect, + attr.asdict(distributions[0]), [dict, list]) + + def test_venv_install_noop(): dist = VenvDistribution( name="venv", diff --git a/reproman/distributions/venv.py b/reproman/distributions/venv.py index 38fbdc9c6..f9059ce51 100644 --- a/reproman/distributions/venv.py +++ b/reproman/distributions/venv.py @@ -151,13 +151,12 @@ def identify_distributions(self, files): pkg_to_found_files[file_to_pkg[fullpath]].append( os.path.relpath(path, venv_path)) - # Some files, like venvs/dev/lib/python2.7/abc.py could be - # symlinks populated by virtualenv itself during venv creation - # since it relies on system wide python environment. So we need - # to resolve those into filenames which could be associated with - # system wide installation of python + # Some virtualenv files are links to system files. Files themselves + # may be linked or they may be in a linked directory. We need to + # resolve these links and pass them out as unknown files for other + # tracers to use. for path in unknown_files.copy(): - if is_subpath(path, venv_path) and op.islink(path): + if is_subpath(path, venv_path): rpath = op.realpath(path) # ... but the resolved link may point to another path under # the environment (e.g., bin/python -> bin/python3), and we diff --git a/reproman/interface/retrace.py b/reproman/interface/retrace.py index 5e75ca32b..1f4019b6b 100644 --- a/reproman/interface/retrace.py +++ b/reproman/interface/retrace.py @@ -25,6 +25,7 @@ from ..support.exceptions import InsufficientArgumentsError from ..support.param import Parameter from ..utils import assure_list +from ..utils import pycache_source from ..utils import to_unicode from ..resource import get_manager @@ -98,6 +99,9 @@ def __call__(path=None, spec=None, output_file=None, # Convert paths to unicode paths = map(to_unicode, paths) + # If .pyc files come in (common for ReprozipProvenance), the tracers + # don't recognize them. + paths = (pycache_source(p) or p for p in paths) # The tracers assume normalized paths. paths = list(map(normpath, paths)) @@ -132,7 +136,7 @@ def __call__(path=None, spec=None, output_file=None, RepromanProvenance.write(stream, spec) if stream is not sys.stdout: stream.close() - + return distributions, files # TODO: session should be with a state. Idea is that if we want # to trace while inheriting all custom PATHs which that run might have diff --git a/reproman/tests/test_utils.py b/reproman/tests/test_utils.py index 23edcb1f7..8daa79ed6 100644 --- a/reproman/tests/test_utils.py +++ b/reproman/tests/test_utils.py @@ -47,6 +47,7 @@ from ..utils import PathRoot, is_subpath from ..utils import parse_semantic_version from ..utils import merge_dicts +from ..utils import pycache_source from .utils import ok_, eq_, assert_false, assert_equal, assert_true @@ -560,6 +561,37 @@ def test_merge_dicts(): assert merge_dicts([{1: 1}, {2: 2}, {1: 3}]) == {1: 3, 2: 2} +@pytest.mark.parametrize( + "case", + [{"label": "full-py2", + "value": "/tmp/a/b/c/d.pyc", + "expected": "/tmp/a/b/c/d.py"}, + {"label": "full", + "value": "/tmp/a/b/c/__pycache__/d.cpython-35.pyc", + "expected": "/tmp/a/b/c/d.py"}, + {"label": "relative-py2", + "value": "d.pyc", + "expected": "d.py"}, + {"label": "relative-py2-pyo", + "value": "d.pyo", + "expected": "d.py"}, + {"label": "relative", + "value": "__pycache__/d.cpython-35.pyc", + "expected": "d.py"}, + {"label": "relative-pyo", + "value": "__pycache__/d.cpython-35.opt-1.pyc", + "expected": "d.py"}, + {"label": "not pyc", + "value": "not a pycache", + "expected": None}, + {"label": "empty", + "value": "", + "expected": None}], + ids=itemgetter("label")) +def test_pycache_source(case): + assert pycache_source(case["value"]) == case["expected"] + + def test_line_profile(): pytest.importorskip("line_profiler") diff --git a/reproman/utils.py b/reproman/utils.py index 3b6ec8ab7..be145c53b 100644 --- a/reproman/utils.py +++ b/reproman/utils.py @@ -13,6 +13,7 @@ from shlex import quote as shlex_quote import time +import os.path as op from os.path import curdir, basename, exists, realpath, islink, join as opj, isabs, normpath, expandvars, expanduser, abspath from urllib.parse import quote as urlquote, unquote as urlunquote, urlsplit @@ -1382,4 +1383,34 @@ def merge_dicts(ds): return merged +def pycache_source(path): + """Map a pycache path to the original path. + + Parameters + ---------- + path : str + A Python cache file. + + Returns + ------- + Path of cached Python file (str) or None if `path` doesn't look like a + cache file. + """ + if not (path.endswith(".pyc") or path.endswith(".pyo")): + lgr.debug("Path does not look like a Python cache file: %s", path) + return + + if "__pycache__" not in path: # py2 + pyfile = path[:-1] + else: + # It should be a py3-style path, e.g., "__pycache__/f.cpython-35.pyc" + # or "__pycache__/f.cpython-35.opt-2.pyc". + leading, base = op.split(path) + name = base.split(".", 1)[0] + pyfile = op.join(leading[:-len("__pycache__")], name + ".py") + lgr.debug("Converted pycache file %s to source file %s", + path, pyfile) + return pyfile + + lgr.log(5, "Done importing reproman.utils")