forked from pantsbuild/pants
/
simple_codegen_task.py
495 lines (395 loc) · 18.7 KB
/
simple_codegen_task.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
# coding=utf-8
# Copyright 2015 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
import os
from abc import abstractmethod
from builtins import zip
from collections import OrderedDict
from future.utils import text_type
from twitter.common.collections import OrderedSet
from pants.base.build_environment import get_buildroot
from pants.base.deprecated import deprecated_conditional
from pants.base.exceptions import TaskError
from pants.base.workunit import WorkUnitLabel
from pants.build_graph.address import Address
from pants.build_graph.address_lookup_error import AddressLookupError
from pants.engine.fs import PathGlobs, PathGlobsAndRoot
from pants.source.wrapped_globs import EagerFilesetWithSpec, FilesetRelPathWrapper
from pants.task.task import Task
from pants.util.dirutil import fast_relpath, safe_delete, safe_walk
logger = logging.getLogger(__name__)
class SimpleCodegenTask(Task):
"""A base-class for code generation for a single target language.
:API: public
"""
# Subclasses may override to provide the type of gen targets the target acts on.
# E.g., JavaThriftLibrary. If not provided, the subclass must implement is_gentarget.
gentarget_type = None
# Subclasses may override to provide a list of glob patterns matching the generated sources,
# relative to the target's workdir.
# These must be a tuple of strings, e.g. ('**/*.java',).
# If this is not set, the deprecated find_sources will be used.
sources_globs = None
# Tuple of glob patterns to exclude from the above matches.
sources_exclude_globs = ()
def __init__(self, context, workdir):
"""
Add pass-thru Task Constructor for public API visibility.
:API: public
"""
super(SimpleCodegenTask, self).__init__(context, workdir)
@classmethod
def product_types(cls):
# NB(gmalmquist): This is a hack copied from the old CodeGen base class to get the round manager
# to properly run codegen before resolve and compile. It would be more correct to just have each
# individual codegen class declare what languages it generates, but would cause problems with
# scala. See https://rbcommons.com/s/twitter/r/2540/.
return ['java', 'scala', 'python']
@classmethod
def register_options(cls, register):
super(SimpleCodegenTask, cls).register_options(register)
register('--allow-empty', type=bool, default=True, fingerprint=True,
help='Skip targets with no sources defined.',
advanced=True)
register('--allow-dups', type=bool, fingerprint=True,
help='Allow multiple targets specifying the same sources. If duplicates are '
'allowed, the logic of find_sources will associate generated sources with '
'the least-dependent targets that generate them.',
advanced=True)
@classmethod
def get_fingerprint_strategy(cls):
"""Override this method to use a fingerprint strategy other than the default one.
:API: public
:return: a fingerprint strategy, or None to use the default strategy.
"""
return None
@property
def cache_target_dirs(self):
return True
@property
def validate_sources_present(self):
"""A property indicating whether input targets require sources.
If targets should have sources, the `--allow-empty` flag indicates whether it is a
warning or an error for sources to be missing.
:API: public
"""
return True
def synthetic_target_extra_dependencies(self, target, target_workdir):
"""Gets any extra dependencies generated synthetic targets should have.
This method is optional for subclasses to implement, because some code generators may have no
extra dependencies.
:param Target target: the Target from which we are generating a synthetic Target. E.g., 'target'
might be a JavaProtobufLibrary, whose corresponding synthetic Target would be a JavaLibrary.
It may not be necessary to use this parameter depending on the details of the subclass.
:API: public
:return: a list of dependencies.
"""
return []
def synthetic_target_extra_exports(self, target, target_workdir):
"""Gets any extra exports generated synthetic targets should have.
This method is optional for subclasses to implement, because some code generators may have no
extra exports.
NB: Extra exports must also be present in the extra dependencies.
:param Target target: the Target from which we are generating a synthetic Target. E.g., 'target'
might be a JavaProtobufLibrary, whose corresponding synthetic Target would be a JavaLibrary.
It may not be necessary to use this parameter depending on the details of the subclass.
:API: public
:return: a list of exported targets.
"""
return []
def synthetic_target_type_by_target(self, target):
"""The type of target this codegen task generates.
For example, the target type for JaxbGen would simply be JavaLibrary.
:API: public
:return: a type (class) that inherits from Target.
"""
raise NotImplementedError
def synthetic_target_type(self, target):
"""The type of target this codegen task generates.
For example, the target type for JaxbGen would simply be JavaLibrary.
:API: public
:return: a type (class) that inherits from Target.
"""
raise NotImplementedError
def is_gentarget(self, target):
"""Predicate which determines whether the target in question is relevant to this codegen task.
E.g., the JaxbGen task considers JaxbLibrary targets to be relevant, and nothing else.
:API: public
:param Target target: The target to check.
:return: True if this class can generate code for the given target, False otherwise.
"""
if self.gentarget_type:
return isinstance(target, self.gentarget_type)
else:
raise NotImplementedError
def ignore_dup(self, tgt1, tgt2, rel_src):
"""Subclasses can override to omit a specific generated source file from dup checking."""
return False
def codegen_targets(self):
"""Finds codegen targets in the dependency graph.
:API: public
:return: an iterable of dependency targets.
"""
return self.context.targets(self.is_gentarget)
def _do_validate_sources_present(self, target):
"""Checks whether sources is empty, and either raises a TaskError or just returns False.
The specifics of this behavior are defined by whether the user sets --allow-empty to True/False:
--allow-empty=False will result in a TaskError being raised in the event of an empty source
set. If --allow-empty=True, this method will just return false and log a warning.
Shared for all SimpleCodegenTask subclasses to help keep errors consistent and descriptive.
:param target: Target to validate.
:return: True if sources is not empty, False otherwise.
"""
if not self.validate_sources_present:
return True
sources = target.sources_relative_to_buildroot()
if not sources:
message = ('Target {} has no sources.'.format(target.address.spec))
if not self.get_options().allow_empty:
raise TaskError(message)
else:
logging.warn(message)
return False
return True
def _get_synthetic_address(self, target, target_workdir):
synthetic_name = target.id
sources_rel_path = os.path.relpath(target_workdir, get_buildroot())
synthetic_address = Address(sources_rel_path, synthetic_name)
return synthetic_address
def execute(self):
codegen_targets = self.codegen_targets()
if not codegen_targets:
return
with self.invalidated(codegen_targets,
invalidate_dependents=True,
topological_order=True,
fingerprint_strategy=self.get_fingerprint_strategy()) as invalidation_check:
with self.context.new_workunit(name='execute', labels=[WorkUnitLabel.MULTITOOL]):
vts_to_sources = OrderedDict()
for vt in invalidation_check.all_vts:
synthetic_target_dir = self.synthetic_target_dir(vt.target, vt.results_dir)
key = (vt, synthetic_target_dir)
vts_to_sources[key] = None
# Build the target and handle duplicate sources.
if not vt.valid:
if self._do_validate_sources_present(vt.target):
self.execute_codegen(vt.target, vt.results_dir)
sources = self._capture_sources((key,))[0]
# _handle_duplicate_sources may delete files from the filesystem, so we need to
# re-capture the sources.
if not self._handle_duplicate_sources(vt.target, vt.results_dir, sources):
vts_to_sources[key] = sources
vt.update()
vts_to_capture = tuple(key for key, sources in vts_to_sources.items() if sources is None)
filesets = self._capture_sources(vts_to_capture)
for key, fileset in zip(vts_to_capture, filesets):
vts_to_sources[key] = fileset
for (vt, synthetic_target_dir), fileset in vts_to_sources.items():
self._inject_synthetic_target(
vt.target,
synthetic_target_dir,
fileset,
)
self._mark_transitive_invalidation_hashes_dirty(
vt.target.address for vt in invalidation_check.all_vts
)
def _mark_transitive_invalidation_hashes_dirty(self, addresses):
self.context.build_graph.walk_transitive_dependee_graph(
addresses,
work=lambda t: t.mark_transitive_invalidation_hash_dirty(),
)
@property
def _copy_target_attributes(self):
"""Return a list of attributes to be copied from the target to derived synthetic targets.
By default, propagates the provides, scope, and tags attributes.
"""
return ['provides', 'tags', 'scope']
def synthetic_target_dir(self, target, target_workdir):
"""
:API: public
"""
return target_workdir
# Accepts tuple of tuples of (target, synthetic_target_dir)
# Returns tuple of EagerFilesetWithSpecs in matching order.
def _capture_sources(self, targets_and_dirs):
to_capture = []
results_dirs = []
filespecs = []
for target, synthetic_target_dir in targets_and_dirs:
if self.sources_globs is None:
files = list(self.find_sources(target, synthetic_target_dir))
else:
files = self.sources_globs
results_dir_relpath = os.path.relpath(synthetic_target_dir, get_buildroot())
buildroot_relative_globs = tuple(os.path.join(results_dir_relpath, file) for file in files)
buildroot_relative_excludes = tuple(
os.path.join(results_dir_relpath, file)
for file in self.sources_exclude_globs
)
to_capture.append(
PathGlobsAndRoot(
PathGlobs(buildroot_relative_globs, buildroot_relative_excludes),
text_type(get_buildroot()),
)
)
results_dirs.append(results_dir_relpath)
filespecs.append(FilesetRelPathWrapper.to_filespec(buildroot_relative_globs))
snapshots = self.context._scheduler.capture_snapshots(tuple(to_capture))
return tuple(EagerFilesetWithSpec(
results_dir_relpath,
filespec,
snapshot,
) for (results_dir_relpath, filespec, snapshot) in zip(results_dirs, filespecs, snapshots))
def _inject_synthetic_target(
self,
target,
target_workdir,
sources,
):
"""Create, inject, and return a synthetic target for the given target and workdir.
:param target: The target to inject a synthetic target for.
:param target_workdir: The work directory containing the generated code for the target.
"""
synthetic_target_type = self.synthetic_target_type(target)
synthetic_extra_dependencies = self.synthetic_target_extra_dependencies(target, target_workdir)
copied_attributes = {}
for attribute in self._copy_target_attributes:
copied_attributes[attribute] = getattr(target, attribute)
if self._supports_exports(synthetic_target_type):
extra_exports = self.synthetic_target_extra_exports(target, target_workdir)
extra_exports_not_in_extra_dependencies = set(extra_exports).difference(
set(synthetic_extra_dependencies))
if len(extra_exports_not_in_extra_dependencies) > 0:
raise self.MismatchedExtraExports(
'Extra synthetic exports included targets not in the extra dependencies: {}. Affected target: {}'
.format(extra_exports_not_in_extra_dependencies, target))
extra_export_specs = {e.address.spec for e in extra_exports}
original_export_specs = self._original_export_specs(target)
union = set(original_export_specs).union(extra_export_specs)
copied_attributes['exports'] = sorted(union)
synthetic_target = self.context.add_new_target(
address=self._get_synthetic_address(target, target_workdir),
target_type=synthetic_target_type,
dependencies=synthetic_extra_dependencies,
sources=sources,
derived_from=target,
**copied_attributes
)
build_graph = self.context.build_graph
# NB(pl): This bypasses the convenience function (Target.inject_dependency) in order
# to improve performance. Note that we can walk the transitive dependee subgraph once
# for transitive invalidation rather than walking a smaller subgraph for every single
# dependency injected.
for dependent_address in build_graph.dependents_of(target.address):
build_graph.inject_dependency(
dependent=dependent_address,
dependency=synthetic_target.address,
)
# NB(pl): See the above comment. The same note applies.
for concrete_dependency_address in build_graph.dependencies_of(target.address):
build_graph.inject_dependency(
dependent=synthetic_target.address,
dependency=concrete_dependency_address,
)
if target in self.context.target_roots:
self.context.target_roots.append(synthetic_target)
return synthetic_target
def _supports_exports(self, target_type):
return hasattr(target_type, 'export_specs')
def _original_export_specs(self, target):
return [t.spec for t in target.export_addresses]
def resolve_deps(self, unresolved_deps):
"""
:API: public
"""
deps = OrderedSet()
for dep in unresolved_deps:
try:
deps.update(self.context.resolve(dep))
except AddressLookupError as e:
raise AddressLookupError('{message}\n on dependency {dep}'.format(message=e, dep=dep))
return deps
@abstractmethod
def execute_codegen(self, target, target_workdir):
"""Generate code for the given target.
:param target: A target to generate code for
:param target_workdir: A clean directory into which to generate code
"""
def find_sources(self, target, target_workdir):
"""Determines what sources were generated by the target after the fact.
This is done by searching the directory where this target's code was generated.
:param Target target: the target for which to find generated sources.
:param path target_workdir: directory containing sources for the target.
:return: A set of filepaths relative to the target_workdir.
:rtype: OrderedSet
"""
deprecated_conditional(
lambda: True,
'1.10.0.dev0',
'SimpleCodegenTask.find_sources is deprecated. Subclasses should instead specify '
'sources_globs and sources_exclude_globs. '
'Class to update: {}. find_sources'.format(self.__class__.__name__)
)
return OrderedSet(self._find_sources_in_workdir(target_workdir))
def _find_sources_in_workdir(self, target_workdir):
"""Returns relative sources contained in the given target_workdir."""
for root, _, files in safe_walk(target_workdir):
rel_root = fast_relpath(root, target_workdir)
for name in files:
yield os.path.join(rel_root, name)
def _handle_duplicate_sources(self, target, target_workdir, sources):
"""Handles duplicate sources generated by the given gen target by either failure or deletion.
This method should be called after all dependencies have been injected into the graph, but
before injecting the synthetic version of this target.
Returns a boolean indicating whether it modified the underlying filesystem.
NB(gm): Some code generators may re-generate code that their dependent libraries generate.
This results in targets claiming to generate sources that they really don't, so we try to
filter out sources that were actually generated by dependencies of the target. This causes
the code generated by the dependencies to 'win' over the code generated by dependees. By
default, this behavior is disabled, and duplication in generated sources will raise a
TaskError. This is controlled by the --allow-dups flag.
"""
# Walk dependency gentargets and record any sources owned by those targets that are also
# owned by this target.
duplicates_by_target = OrderedDict()
def record_duplicates(dep):
if dep == target or not self.is_gentarget(dep.concrete_derived_from):
return False
duped_sources = [s for s in dep.sources_relative_to_source_root() if s in sources.files and
not self.ignore_dup(target, dep, s)]
if duped_sources:
duplicates_by_target[dep] = duped_sources
target.walk(record_duplicates)
# If there were no dupes, we're done.
if not duplicates_by_target:
return False
# If there were duplicates warn or error.
messages = ['{target} generated sources that had already been generated by dependencies.'
.format(target=target.address.spec)]
for dep, duped_sources in duplicates_by_target.items():
messages.append('\t{} also generated:'.format(dep.concrete_derived_from.address.spec))
messages.extend(['\t\t{}'.format(source) for source in duped_sources])
message = '\n'.join(messages)
if self.get_options().allow_dups:
logger.warn(message)
else:
raise self.DuplicateSourceError(message)
did_modify = False
# Finally, remove duplicates from the workdir. This prevents us from having to worry
# about them during future incremental compiles.
for dep, duped_sources in duplicates_by_target.items():
for duped_source in duped_sources:
safe_delete(os.path.join(target_workdir, duped_source))
did_modify = True
return did_modify
class DuplicateSourceError(TaskError):
"""A target generated the same code that was generated by one of its dependencies.
This is only thrown when --allow-dups=False.
"""
class MismatchedExtraExports(Exception):
"""An extra export didn't have an accompanying explicit extra dependency for the same target.
NB: Exports without accompanying dependencies are caught during compile, but this error will
allow errors caused by injected exports to be surfaced earlier.
"""