Skip to content
Permalink
Browse files

cache python tools in ~/.cache/pants (pantsbuild#7236)

### Problem

This runs for (on my laptop) about 16 seconds every time I do a `clean-all`:
```
22:27:23 00:02   [native-compile]
22:27:23 00:02     [conan-prep]
22:27:23 00:02       [create-conan-pex]
22:27:39 00:18     [conan-fetch]
```

It doesn't seem like we need to be putting this tool in the task workdir as the python requirements list is pretty static. Conan in particular will be instantiated by invoking almost every goal, and it is a nontrivial piece of software to resolve each time.

Also, we aren't mixing in interpreter identity to the generated pex filename, which is a bug that has so far gone undetected: see pantsbuild#7236 (comment).

### Solution

- Take the `stable_json_sha1()` of the requirements of each python tool generated by `PythonToolPrepBase` to generate a fingerprinted pex filename.
- Stick it in the pants cachedir so it doesn't get blown away by a clean-all.
- Add an `--interpreter-constraints` option to pex tools (where previously the repo's `--python-setup-interpreter-constraints` were implicitly used).
- Ensure the selected interpreter identity is mixed into the fingerprinted filename.
- Add a test for the pex filename fingerprinting and that the pex can be successfully executed for python 2 and 3 constraints.

### Result

A significant amount of time spent waiting after clean builds is removed, and pex tools can have their own interpreter constraints as necessary.
  • Loading branch information...
cosmicexplorer committed Feb 22, 2019
1 parent 4097052 commit 8069653f78aefeff2bf5699770722867d3cd3541
@@ -20,6 +20,7 @@ class Conan(PythonToolBase):
'pylint==1.9.3',
]
default_entry_point = 'conans.conan'
default_interpreter_constraints = ['CPython>=2.7,<4']

@classmethod
def register_options(cls, register):
@@ -13,17 +13,26 @@ class PythonToolBase(Subsystem):
# Subclasses must set.
default_requirements = None
default_entry_point = None
# Subclasses need not override.
default_interpreter_constraints = []

@classmethod
def register_options(cls, register):
super(PythonToolBase, cls).register_options(register)
register('--interpreter-constraints', type=list, advanced=True, fingerprint=True,
default=cls.default_interpreter_constraints,
help='Python interpreter constraints for this tool. An empty list uses the default '
'interpreter constraints for the repo.')
register('--requirements', type=list, advanced=True, fingerprint=True,
default=cls.default_requirements,
help='Python requirement strings for the tool.')
register('--entry-point', type=str, advanced=True, fingerprint=True,
default=cls.default_entry_point,
help='The main module for the tool.')

def get_interpreter_constraints(self):
return self.get_options().interpreter_constraints

def get_requirement_specs(self):
return self.get_options().requirements

@@ -5,6 +5,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals

import os
from builtins import str
from contextlib import contextmanager

from pex.pex import PEX
@@ -13,6 +14,8 @@
from pants.backend.python.interpreter_cache import PythonInterpreterCache
from pants.backend.python.python_requirement import PythonRequirement
from pants.backend.python.subsystems.pex_build_util import PexBuilderWrapper
from pants.base.build_environment import get_pants_cachedir
from pants.base.hash_utils import stable_json_sha1
from pants.base.workunit import WorkUnitLabel
from pants.task.task import Task
from pants.util.dirutil import safe_concurrent_creation
@@ -23,11 +26,16 @@
class PythonToolInstance(object):
def __init__(self, pex_path, interpreter):
self._pex = PEX(pex_path, interpreter=interpreter)
self._interpreter = interpreter

@property
def pex(self):
return self._pex

@property
def interpreter(self):
return self._interpreter

def _pretty_cmdline(self, args):
return safe_shlex_join(self._pex.cmdline(args))

@@ -63,6 +71,12 @@ def run(self, *args, **kwargs):
return cmdline, exit_code


# TODO: This python tool setup ends up eagerly generating each pex for each task in every goal which
# is transitively required by the command-line goals, even for tasks which no-op. This requires each
# pex for each relevant python tool to be buildable on the current host, even if it may never be
# intended to be invoked. Especially given the existing clear separation of concerns into
# PythonToolBase/PythonToolInstance/PythonToolPrepBase, this seems like an extremely ripe use case
# for some v2 rules for free caching and no-op when not required for the command-line goals.
class PythonToolPrepBase(Task):
"""Base class for tasks that resolve a python tool to be invoked out-of-process."""

@@ -97,16 +111,30 @@ def _build_tool_pex(self, tool_subsystem, interpreter, pex_path):
pex_builder.set_entry_point(tool_subsystem.get_entry_point())
pex_builder.freeze()

def _generate_fingerprinted_pex_path(self, tool_subsystem, interpreter):
# `tool_subsystem.get_requirement_specs()` is a list, but order shouldn't actually matter. This
# should probably be sorted, but it's possible a user could intentionally tweak order to work
# around a particular requirement resolution resolve-order issue. In practice the lists are
# expected to be mostly static, so we accept the risk of too-fine-grained caching creating lots
# of pexes in the cache dir.
specs_fingerprint = stable_json_sha1(tool_subsystem.get_requirement_specs())
return os.path.join(
get_pants_cachedir(),
'python',
str(interpreter.identity),
self.fingerprint,
'{}-{}.pex'.format(tool_subsystem.options_scope, specs_fingerprint),
)

def execute(self):
tool_subsystem = self.tool_subsystem_cls.scoped_instance(self)
pex_name = tool_subsystem.options_scope
pex_path = os.path.join(self.workdir, self.fingerprint, '{}.pex'.format(pex_name))

interpreter_cache = PythonInterpreterCache.global_instance()
interpreter = interpreter_cache.select_interpreter_for_targets([])
interpreter = min(interpreter_cache.setup(filters=tool_subsystem.get_interpreter_constraints()))

pex_path = self._generate_fingerprinted_pex_path(tool_subsystem, interpreter)
if not os.path.exists(pex_path):
with self.context.new_workunit(name='create-{}-pex'.format(pex_name),
with self.context.new_workunit(name='create-{}-pex'.format(tool_subsystem.options_scope),
labels=[WorkUnitLabel.PREP]):
self._build_tool_pex(tool_subsystem=tool_subsystem,
interpreter=interpreter,
@@ -0,0 +1,87 @@
# coding=utf-8
# Copyright 2019 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import, division, print_function, unicode_literals

import os
import re

from pants.backend.python.subsystems.python_tool_base import PythonToolBase
from pants.backend.python.tasks.python_tool_prep_base import PythonToolInstance, PythonToolPrepBase
from pants.task.task import Task
from pants.util.contextutil import temporary_dir
from pants_test.backend.python.tasks.python_task_test_base import PythonTaskTestBase


class Tool(PythonToolBase):
options_scope = 'test-tool'
default_requirements = [
'pex==1.5.3',
]
default_entry_point = 'pex.bin.pex:main'


class ToolInstance(PythonToolInstance):
pass


class ToolPrep(PythonToolPrepBase):
options_scope = 'tool-prep-task'
tool_subsystem_cls = Tool
tool_instance_cls = ToolInstance


class ToolTask(Task):
options_scope = 'tool-task'

@classmethod
def prepare(cls, options, round_manager):
super(ToolTask, cls).prepare(options, round_manager)
round_manager.require_data(ToolPrep.tool_instance_cls)

def execute(self):
tool_for_pex = self.context.products.get_data(ToolPrep.tool_instance_cls)
stdout, _, exit_code, _ = tool_for_pex.output(['--version'])
assert re.match(r'.*\.pex 1.5.3', stdout)
assert 0 == exit_code


class PythonToolPrepTest(PythonTaskTestBase):

@classmethod
def task_type(cls):
return ToolTask

def _assert_tool_execution_for_python_version(self, use_py3=True):
scope_string = '3' if use_py3 else '2'
constraint_string = 'CPython>=3' if use_py3 else 'CPython<3'
tool_prep_type = self.synthesize_task_subtype(ToolPrep, 'tp_scope_py{}'.format(scope_string))
with temporary_dir() as tmp_dir:
context = self.context(for_task_types=[tool_prep_type], for_subsystems=[Tool], options={
'': {
'pants_bootstrapdir': tmp_dir,
},
'test-tool': {
'interpreter_constraints': [constraint_string],
},
})
tool_prep_task = tool_prep_type(context, os.path.join(
self.pants_workdir, 'tp_py{}'.format(scope_string)))
tool_prep_task.execute()
# Check that the tool can be created and executed successfully.
self.create_task(context).execute()
pex_tool = context.products.get_data(ToolPrep.tool_instance_cls)
# Check that our pex tool wrapper was constructed with the expected interpreter.
self.assertTrue(pex_tool.interpreter.identity.matches(constraint_string))
return pex_tool

def test_tool_execution(self):
"""Test that python tools are fingerprinted by python interpreter."""
py3_pex_tool = self._assert_tool_execution_for_python_version(use_py3=True)
py3_pex_tool_path = py3_pex_tool.pex.path()
self.assertTrue(os.path.isdir(py3_pex_tool_path))
py2_pex_tool = self._assert_tool_execution_for_python_version(use_py3=False)
py2_pex_tool_path = py2_pex_tool.pex.path()
self.assertTrue(os.path.isdir(py2_pex_tool_path))
self.assertNotEqual(py3_pex_tool_path, py2_pex_tool_path)

0 comments on commit 8069653

Please sign in to comment.
You can’t perform that action at this time.