Skip to content
Browse files

Merge pull request #378 from kyleam/venv-pyc

retrace: Map pyc files back to their source
  • Loading branch information...
yarikoptic committed Mar 7, 2019
2 parents fd7e202 + 61c81e1 commit 17af512a8e2d6f88f6276e43e4b4674a59bb1d98
@@ -60,6 +60,7 @@ def venv_test_dir():
def test_venv_identify_distributions(venv_test_dir):
libpaths = {p[-1]: os.path.join("lib", PY_VERSION, *p)
for p in [("",),
("importlib", "yaml", ""),
("site-packages", "yaml", ""),
("site-packages", "attr", "")]}

@@ -71,8 +72,10 @@ def test_venv_identify_distributions(venv_test_dir):
os.path.join("venv1", libpaths[""]),
# A virtualenv file that isn't part of any particular package.
os.path.join("venv1", "bin", "python"),
# A link to the outside world.
os.path.join("venv1", libpaths[""])
# A link to the outside world ...
os.path.join("venv1", libpaths[""]),
# or in a directory that is a link to the outside world.
os.path.join("venv1", libpaths[""])

@@ -88,6 +91,7 @@ def test_venv_identify_distributions(venv_test_dir):
assert unknown_files == {
op.realpath(os.path.join("venv1", libpaths[""])),
op.realpath(os.path.join("venv1", libpaths[""])),
# The editable package was added by VenvTracer as an unknown file.
os.path.join(venv_test_dir, "minimal_pymodule")}

@@ -151,6 +155,29 @@ def test_venv_install(venv_test_dir, tmpdir):
for p in e.packages])

def test_venv_pyc(venv_test_dir, tmpdir):
from reproman.api import retrace
tmpdir = str(tmpdir)
venv_path = op.join("lib", PY_VERSION, "site-packages", "attr")
pyc_path = op.join(
venv_test_dir, "venv1", venv_path, "__pycache__",

if not op.exists(pyc_path):
pytest.skip("Expected file does not exist: {}".format(pyc_path))

distributions, unknown_files = retrace([pyc_path])
assert not unknown_files
assert len(distributions) == 1
expect = {"environments":
[{"packages": [{"files": [op.join(venv_path, "")],
"name": "attrs",
"editable": False}]}]}
attr.asdict(distributions[0]), [dict, list])

def test_venv_install_noop():
dist = VenvDistribution(
@@ -151,13 +151,12 @@ def identify_distributions(self, files):
os.path.relpath(path, venv_path))

# Some files, like venvs/dev/lib/python2.7/ could be
# symlinks populated by virtualenv itself during venv creation
# since it relies on system wide python environment. So we need
# to resolve those into filenames which could be associated with
# system wide installation of python
# Some virtualenv files are links to system files. Files themselves
# may be linked or they may be in a linked directory. We need to
# resolve these links and pass them out as unknown files for other
# tracers to use.
for path in unknown_files.copy():
if is_subpath(path, venv_path) and op.islink(path):
if is_subpath(path, venv_path):
rpath = op.realpath(path)
# ... but the resolved link may point to another path under
# the environment (e.g., bin/python -> bin/python3), and we
@@ -25,6 +25,7 @@
from import InsufficientArgumentsError
from import Parameter
from ..utils import assure_list
from ..utils import pycache_source
from ..utils import to_unicode
from ..resource import get_manager

@@ -98,6 +99,9 @@ def __call__(path=None, spec=None, output_file=None,

# Convert paths to unicode
paths = map(to_unicode, paths)
# If .pyc files come in (common for ReprozipProvenance), the tracers
# don't recognize them.
paths = (pycache_source(p) or p for p in paths)
# The tracers assume normalized paths.
paths = list(map(normpath, paths))

@@ -132,7 +136,7 @@ def __call__(path=None, spec=None, output_file=None,
RepromanProvenance.write(stream, spec)
if stream is not sys.stdout:

return distributions, files

# TODO: session should be with a state. Idea is that if we want
# to trace while inheriting all custom PATHs which that run might have
@@ -47,6 +47,7 @@
from ..utils import PathRoot, is_subpath
from ..utils import parse_semantic_version
from ..utils import merge_dicts
from ..utils import pycache_source

from .utils import ok_, eq_, assert_false, assert_equal, assert_true

@@ -560,6 +561,37 @@ def test_merge_dicts():
assert merge_dicts([{1: 1}, {2: 2}, {1: 3}]) == {1: 3, 2: 2}

[{"label": "full-py2",
"value": "/tmp/a/b/c/d.pyc",
"expected": "/tmp/a/b/c/"},
{"label": "full",
"value": "/tmp/a/b/c/__pycache__/d.cpython-35.pyc",
"expected": "/tmp/a/b/c/"},
{"label": "relative-py2",
"value": "d.pyc",
"expected": ""},
{"label": "relative-py2-pyo",
"value": "d.pyo",
"expected": ""},
{"label": "relative",
"value": "__pycache__/d.cpython-35.pyc",
"expected": ""},
{"label": "relative-pyo",
"value": "__pycache__/d.cpython-35.opt-1.pyc",
"expected": ""},
{"label": "not pyc",
"value": "not a pycache",
"expected": None},
{"label": "empty",
"value": "",
"expected": None}],
def test_pycache_source(case):
assert pycache_source(case["value"]) == case["expected"]

def test_line_profile():

@@ -13,6 +13,7 @@
from shlex import quote as shlex_quote
import time

import os.path as op
from os.path import curdir, basename, exists, realpath, islink, join as opj, isabs, normpath, expandvars, expanduser, abspath
from urllib.parse import quote as urlquote, unquote as urlunquote, urlsplit

@@ -1382,4 +1383,34 @@ def merge_dicts(ds):
return merged

def pycache_source(path):
"""Map a pycache path to the original path.
path : str
A Python cache file.
Path of cached Python file (str) or None if `path` doesn't look like a
cache file.
if not (path.endswith(".pyc") or path.endswith(".pyo")):
lgr.debug("Path does not look like a Python cache file: %s", path)

if "__pycache__" not in path: # py2
pyfile = path[:-1]
# It should be a py3-style path, e.g., "__pycache__/f.cpython-35.pyc"
# or "__pycache__/f.cpython-35.opt-2.pyc".
leading, base = op.split(path)
name = base.split(".", 1)[0]
pyfile = op.join(leading[:-len("__pycache__")], name + ".py")
lgr.debug("Converted pycache file %s to source file %s",
path, pyfile)
return pyfile

lgr.log(5, "Done importing reproman.utils")

0 comments on commit 17af512

Please sign in to comment.
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.