-
Notifications
You must be signed in to change notification settings - Fork 186
/
violations_reporter.py
717 lines (621 loc) · 25.4 KB
/
violations_reporter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
"""
Classes for querying the information in a test coverage report.
"""
import itertools
import os
import os.path
import re
from collections import defaultdict
from diff_cover import util
from diff_cover.command_runner import run_command_for_code
from diff_cover.git_path import GitPathTool
from diff_cover.violationsreporters.base import (
BaseViolationReporter,
QualityDriver,
RegexBasedDriver,
Violation,
)
class XmlCoverageReporter(BaseViolationReporter):
"""
Query information from a Cobertura|Clover|JaCoCo XML coverage report.
"""
def __init__(self, xml_roots, src_roots=None):
"""
Load the XML coverage report represented
by the cElementTree with root element `xml_root`.
"""
super().__init__("XML")
self._xml_roots = xml_roots
# Create a dict to cache violations dict results
# Keys are source file paths, values are output of `violations()`
self._info_cache = defaultdict(list)
# Create a list to cache xml classes list results
# Values are output of `self._get_xml_classes()`
self._xml_cache = [{} for i in range(len(xml_roots))]
self._src_roots = src_roots or [""]
def _get_xml_classes(self, xml_document):
"""
Return a dict of classes in `xml_document`.
Keys are `filename`, values are list of `class`
If `class` is not present in `xml_document`,
return empty defaultdict(list)
"""
# cobertura sometimes provides the sources for the measurements
# within it. If we have that we outta use it
sources = xml_document.findall("sources/source")
sources = [source.text for source in sources if source.text]
classes = xml_document.findall(".//class") or []
res = defaultdict(list)
for clazz in classes:
f = clazz.get("filename")
if not f:
continue
res[util.to_unix_path(f)].append(clazz)
for source in sources:
abs_f = util.to_unix_path(os.path.join(source.strip(), f))
res[abs_f].append(clazz)
return res
def _get_classes(self, index, xml_document, src_path):
"""
Given a path and parsed xml_document provides class nodes
with the relevant lines
First, we look to see if xml_document contains a source
node providing paths to search for
If we don't have that we check each nodes filename attribute
matches an absolute path
Finally, if we found no nodes, we check the filename attribute
for the relative path
"""
# Remove git_root from src_path for searching the correct filename
# If cwd is `/home/user/work/diff-cover/diff_cover`
# and src_path is `diff_cover/violations_reporter.py`
# search for `violations_reporter.py`
src_rel_path = util.to_unix_path(GitPathTool.relative_path(src_path))
# If cwd is `/home/user/work/diff-cover/diff_cover`
# and src_path is `other_package/some_file.py`
# search for `/home/user/work/diff-cover/other_package/some_file.py`
src_abs_path = util.to_unix_path(GitPathTool.absolute_path(src_path))
# Create a cache for `classes` in `xml_document` if cache exists
if not self._xml_cache[index]:
self._xml_cache[index] = self._get_xml_classes(xml_document)
return self._xml_cache[index].get(src_abs_path) or self._xml_cache[index].get(
src_rel_path
)
def get_src_path_line_nodes_cobertura(self, index, xml_document, src_path):
classes = self._get_classes(index, xml_document, src_path)
if not classes:
return None
lines = [clazz.findall("./lines/line") for clazz in classes]
return list(itertools.chain(*lines))
@staticmethod
def get_src_path_line_nodes_clover(xml_document, src_path):
"""
Return a list of nodes containing line information for `src_path`
in `xml_document`.
If file is not present in `xml_document`, return None
"""
files = [
file_tree
for file_tree in xml_document.findall(".//file")
if GitPathTool.relative_path(file_tree.get("path")) == src_path
]
if not files:
return None
lines = []
for file_tree in files:
lines.append(file_tree.findall('./line[@type="stmt"]'))
lines.append(file_tree.findall('./line[@type="cond"]'))
return list(itertools.chain(*lines))
def _measured_source_path_matches(self, package_name, file_name, src_path):
# find src_path in any of the source roots
if not src_path.endswith(file_name):
return False
norm_src_path = os.path.normcase(src_path)
for root in self._src_roots:
if (
os.path.normcase(
GitPathTool.relative_path(
os.path.join(root, package_name, file_name)
)
)
== norm_src_path
):
return True
return False
def get_src_path_line_nodes_jacoco(self, xml_document, src_path):
"""
Return a list of nodes containing line information for `src_path`
in `xml_document`.
If file is not present in `xml_document`, return None
"""
files = []
packages = list(xml_document.findall(".//package"))
for pkg in packages:
_files = [
_file
for _file in pkg.findall("sourcefile")
if self._measured_source_path_matches(
pkg.get("name"), _file.get("name"), src_path
)
]
files.extend(_files)
if not files:
return None
lines = [file_tree.findall("./line") for file_tree in files]
return list(itertools.chain(*lines))
def _cache_file(self, src_path):
"""
Load the data from `self._xml_roots`
for `src_path`, if it hasn't been already.
"""
# If we have not yet loaded this source file
if src_path not in self._info_cache:
# We only want to keep violations that show up in each xml source.
# Thus, each time, we take the intersection. However, to do this
# we must treat the first time as a special case and just add all
# the violations from the first xml report.
violations = None
# A line is measured if it is measured in any of the reports, so
# we take set union each time and can just start with the empty set
measured = set()
# Loop through the files that contain the xml roots
for i, xml_document in enumerate(self._xml_roots):
if xml_document.findall(".[@clover]"):
# see etc/schema/clover.xsd at https://bitbucket.org/atlassian/clover/src
line_nodes = self.get_src_path_line_nodes_clover(
xml_document, src_path
)
_number = "num"
_hits = "count"
elif xml_document.findall(".[@name]"):
# https://github.com/jacoco/jacoco/blob/master/org.jacoco.report/src/org/jacoco/report/xml/report.dtd
line_nodes = self.get_src_path_line_nodes_jacoco(
xml_document, src_path
)
_number = "nr"
_hits = "ci"
else:
# https://github.com/cobertura/web/blob/master/htdocs/xml/coverage-04.dtd
line_nodes = self.get_src_path_line_nodes_cobertura(
i, xml_document, src_path
)
_number = "number"
_hits = "hits"
if line_nodes is None:
continue
# First case, need to define violations initially
if violations is None:
violations = {
Violation(int(line.get(_number)), None)
for line in line_nodes
if int(line.get(_hits, 0)) == 0
}
# If we already have a violations set,
# take the intersection of the new
# violations set and its old self
else:
violations = violations & {
Violation(int(line.get(_number)), None)
for line in line_nodes
if int(line.get(_hits, 0)) == 0
}
# Measured is the union of itself and the new measured
measured = measured | {int(line.get(_number)) for line in line_nodes}
# If we don't have any information about the source file,
# don't report any violations
if violations is None:
violations = set()
self._info_cache[src_path] = (violations, measured)
def violations(self, src_path):
"""
See base class comments.
"""
self._cache_file(src_path)
# Yield all lines not covered
return self._info_cache[src_path][0]
def measured_lines(self, src_path):
"""
See base class docstring.
"""
self._cache_file(src_path)
return self._info_cache[src_path][1]
class LcovCoverageReporter(BaseViolationReporter):
"""
Query information from a Cobertura|Clover|JaCoCo XML coverage report.
"""
def __init__(self, lcov_roots, src_roots=None):
"""
Load the lcov.info coverage report represented
"""
super().__init__("LCOV")
self._lcov_roots = lcov_roots
self._lcov_report = defaultdict(list)
# Create a dict to cache violations dict results
# Keys are source file paths, values are output of `violations()`
self._info_cache = defaultdict(list)
self._src_roots = src_roots or [""]
@staticmethod
def parse(lcov_file):
"""
Parse a single LCov coverage report
File format: https://ltp.sourceforge.net/coverage/lcov/geninfo.1.php
"""
lcov_report = defaultdict(dict)
lcov = open(lcov_file)
while True:
line = lcov.readline()
if not line:
break
directive, _, content = line.strip().partition(":")
# we're only interested in file name and line coverage
if directive == "SF":
# SF:<absolute path to the source file>
source_file = util.to_unix_path(GitPathTool.relative_path(content))
continue
elif directive == "DA":
# DA:<line number>,<execution count>[,<checksum>]
args = content.split(",")
if len(args) < 2 or len(args) > 3:
raise ValueError(f"Unknown syntax in lcov report: {line}")
line_no = int(args[0])
num_executions = int(args[1])
if source_file is None:
raise ValueError(
f"No source file specified for line coverage: {line}"
)
if line_no not in lcov_report[source_file]:
lcov_report[source_file][line_no] = 0
lcov_report[source_file][line_no] += num_executions
elif directive in [
"TN",
"FNF",
"FNH",
"FN",
"FNDA",
"LH",
"LF",
"BRF",
"BRH",
"BRDA",
]:
# these are valid lines, but not we don't need them
continue
elif directive == "end_of_record":
source_file = None
else:
raise ValueError(f"Unknown syntax in lcov report: {line}")
lcov.close()
return lcov_report
def _cache_file(self, src_path):
"""
Load the data from `self._lcov_roots`
for `src_path`, if it hasn't been already.
"""
# If we have not yet loaded this source file
if src_path not in self._info_cache:
# We only want to keep violations that show up in each xml source.
# Thus, each time, we take the intersection. However, to do this
# we must treat the first time as a special case and just add all
# the violations from the first xml report.
violations = None
# A line is measured if it is measured in any of the reports, so
# we take set union each time and can just start with the empty set
measured = set()
# Remove git_root from src_path for searching the correct filename
# If cwd is `/home/user/work/diff-cover/diff_cover`
# and src_path is `diff_cover/violations_reporter.py`
# search for `violations_reporter.py`
src_rel_path = util.to_unix_path(GitPathTool.relative_path(src_path))
# If cwd is `/home/user/work/diff-cover/diff_cover`
# and src_path is `other_package/some_file.py`
# search for `/home/user/work/diff-cover/other_package/some_file.py`
src_abs_path = util.to_unix_path(GitPathTool.absolute_path(src_path))
# Loop through the files that contain the xml roots
for lcov_document in self._lcov_roots:
src_search_path = src_abs_path
if src_search_path not in lcov_document:
src_search_path = src_rel_path
# First case, need to define violations initially
if violations is None:
violations = {
Violation(int(line_no), None)
for line_no, num_executions in lcov_document[
src_search_path
].items()
if int(num_executions) == 0
}
# If we already have a violations set,
# take the intersection of the new
# violations set and its old self
else:
violations = violations & {
Violation(int(line_no), None)
for line_no, num_executions in lcov_document[
src_search_path
].items()
if int(num_executions) == 0
}
# Measured is the union of itself and the new measured
# measured = measured | {int(line.get(_number)) for line in line_nodes}
measured = measured | {
int(line_no)
for line_no, num_executions in lcov_document[
src_search_path
].items()
}
# If we don't have any information about the source file,
# don't report any violations
if violations is None:
violations = set()
self._info_cache[src_path] = (violations, measured)
def violations(self, src_path):
"""
See base class comments.
"""
self._cache_file(src_path)
# Yield all lines not covered
return self._info_cache[src_path][0]
def measured_lines(self, src_path):
"""
See base class docstring.
"""
self._cache_file(src_path)
return self._info_cache[src_path][1]
pycodestyle_driver = RegexBasedDriver(
name="pycodestyle",
supported_extensions=["py"],
command=["pycodestyle"],
expression=r"^([^:]+):(\d+).*([EW]\d{3}.*)$",
command_to_check_install=["pycodestyle", "--version"],
# pycodestyle exit code is 1 if there are violations
# http://pycodestyle.pycqa.org/en/latest/intro.html
exit_codes=[0, 1],
)
pyflakes_driver = RegexBasedDriver(
name="pyflakes",
supported_extensions=["py"],
command=["pyflakes"],
# Match lines of the form:
# path/to/file.py:328: undefined name '_thing'
# path/to/file.py:418: 'random' imported but unused
expression=r"^([^:]+):(\d+):\d*:? (.*)$",
command_to_check_install=["pyflakes", "--version"],
# pyflakes exit code is 1 if there are violations
# https://github.com/PyCQA/pyflakes/blob/master/pyflakes/api.py#L211
exit_codes=[0, 1],
)
"""
Report Flake8 violations.
"""
flake8_driver = RegexBasedDriver(
name="flake8",
supported_extensions=["py"],
command=["flake8"],
# Match lines of the form:
# new_file.py:1:17: E231 whitespace
expression=r"^([^:]+):(\d+):(?:\d+): ([a-zA-Z]+\d+.*)$",
command_to_check_install=["flake8", "--version"],
# flake8 exit code is 1 if there are violations
# http://flake8.pycqa.org/en/latest/user/invocation.html
exit_codes=[0, 1],
)
jshint_driver = RegexBasedDriver(
name="jshint",
supported_extensions=["js"],
command=["jshint"],
expression=r"^([^:]+): line (\d+), col \d+, (.*)$",
command_to_check_install=["jshint", "-v"],
)
shellcheck_driver = RegexBasedDriver(
name="shellcheck",
supported_extensions=["sh"],
# Use gcc format to ease violations parsing
command=["shellcheck", "-f", "gcc"],
expression=r"^([^:]+):(\d+):(\d+: .*)$",
command_to_check_install=["shellcheck", "-V"],
# shellcheck exit code is 1 if there are violations
# https://www.shellcheck.net/wiki/Integration#exit-codes
exit_codes=[0, 1],
)
class EslintDriver(RegexBasedDriver):
def __init__(self):
super().__init__(
name="eslint",
supported_extensions=["js"],
command=["eslint", "--format=compact"],
expression=r"^([^:]+): line (\d+), col \d+, (.*)$",
command_to_check_install=["eslint", "-v"],
)
self.report_root_path = None
def add_driver_args(self, **kwargs):
self.report_root_path = kwargs.pop("report_root_path", None)
if kwargs:
super().add_driver_args(**kwargs)
def parse_reports(self, reports):
violations_dict = super().parse_reports(reports)
if self.report_root_path:
keys = list(violations_dict.keys())
for key in keys:
new_key = os.path.relpath(key, self.report_root_path)
violations_dict[new_key] = violations_dict.pop(key)
return violations_dict
"""
Report pydocstyle violations.
Warning/error codes:
D1**: Missing Docstrings
D2**: Whitespace Issues
D3**: Quotes Issues
D4**: Docstring Content Issues
http://www.pydocstyle.org/en/latest/error_codes.html
"""
pydocstyle_driver = RegexBasedDriver(
name="pydocstyle",
supported_extensions=["py"],
command=["pydocstyle"],
expression=r"^(.+?):(\d+).*?$.+?^ (.*?)$",
command_to_check_install=["pydocstyle", "--version"],
flags=re.MULTILINE | re.DOTALL,
# pydocstyle exit code is 1 if there are violations
# http://www.pydocstyle.org/en/2.1.1/usage.html#return-code
exit_codes=[0, 1],
)
class PylintDriver(QualityDriver):
def __init__(self):
"""
args:
expression: regex used to parse report
See super for other args
"""
super().__init__(
"pylint",
["py"],
[
"pylint",
'--msg-template="{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}"',
],
# Pylint returns bit-encoded exit codes as documented here:
# https://pylint.readthedocs.io/en/latest/user_guide/run.html
# 1 = fatal error, occurs if an error prevents pylint from doing further processing
# 2,4,8,16 = error/warning/refactor/convention message issued
# 32 = usage error
[
0,
2,
4,
2 | 4,
8,
2 | 8,
4 | 8,
2 | 4 | 8,
16,
2 | 16,
4 | 16,
2 | 4 | 16,
8 | 16,
2 | 8 | 16,
4 | 8 | 16,
2 | 4 | 8 | 16,
],
)
self.pylint_expression = re.compile(
r"^([^:]+):(\d+): \[(\w+),? ?([^\]]*)] (.*)$"
)
self.dupe_code_violation = "R0801"
self.command_to_check_install = ["pylint", "--version"]
# Match lines of the form:
# path/to/file.py:123: [C0111] Missing docstring
# path/to/file.py:456: [C0111, Foo.bar] Missing docstring
self.multi_line_violation_regex = re.compile(r"==((?:\w|\.)+?):\[?(\d+)")
self.dupe_code_violation_regex = re.compile(r"Similar lines in (\d+) files")
def _process_dupe_code_violation(self, lines, current_line, message):
"""
The duplicate code violation is a multi line error. This pulls out
all the relevant files
"""
src_paths = []
message_match = self.dupe_code_violation_regex.match(message)
if message_match:
for _ in range(int(message_match.group(1))):
current_line += 1
match = self.multi_line_violation_regex.match(lines[current_line])
src_path, l_number = match.groups()
src_paths.append(("%s.py" % src_path, l_number))
return src_paths
def parse_reports(self, reports):
"""
Args:
reports: list[str] - output from the report
Return:
A dict[Str:Violation]
Violation is a simple named tuple Defined above
"""
violations_dict = defaultdict(list)
for report in reports:
output_lines = report.split("\n")
for output_line_number, line in enumerate(output_lines):
match = self.pylint_expression.match(line)
# Ignore any line that isn't matched
# (for example, snippets from the source code)
if match is not None:
(
pylint_src_path,
line_number,
pylint_code,
function_name,
message,
) = match.groups()
if pylint_code == self.dupe_code_violation:
files_involved = self._process_dupe_code_violation(
output_lines, output_line_number, message
)
else:
files_involved = [(pylint_src_path, line_number)]
for violation in files_involved:
pylint_src_path, line_number = violation
# pylint might uses windows paths
pylint_src_path = util.to_unix_path(pylint_src_path)
# If we're looking for a particular source file,
# ignore any other source files.
if function_name:
error_str = "{}: {}: {}".format(
pylint_code, function_name, message
)
else:
error_str = f"{pylint_code}: {message}"
violation = Violation(int(line_number), error_str)
violations_dict[pylint_src_path].append(violation)
return violations_dict
def installed(self):
"""
Method checks if the provided tool is installed.
Returns: boolean True if installed
"""
return run_command_for_code(self.command_to_check_install) == 0
class CppcheckDriver(QualityDriver):
"""
Driver for cppcheck c/c++ static analyzer.
"""
def __init__(self):
"""
args:
expression: regex used to parse report
See super for other args
"""
super().__init__(
"cppcheck",
["c", "cpp", "h", "hpp"],
["cppcheck", "--quiet"],
output_stderr=True,
)
# Errors look like:
# [src/foo.c:123]: (error) Array 'yolo[4]' accessed at index 4, which is out of bounds.
# Match for everything, including ":" in the file name (first capturing
# group), in case there are pathological path names with ":"
self.cppcheck_expression = re.compile(r"^\[(.*?):(\d+)\]: (.*$)")
self.command_to_check_install = ["cppcheck", "--version"]
def parse_reports(self, reports):
"""
Args:
reports: list[str] - output from the report
Return:
A dict[Str:Violation]
Violation is a simple named tuple Defined above
"""
violations_dict = defaultdict(list)
for report in reports:
output_lines = report.splitlines()
for line in output_lines:
match = self.cppcheck_expression.match(line)
# Ignore any line that isn't matched
# (for example, snippets from the source code)
if match is not None:
(cppcheck_src_path, line_number, message) = match.groups()
violation = Violation(int(line_number), message)
violations_dict[cppcheck_src_path].append(violation)
return violations_dict
def installed(self):
"""
Method checks if the provided tool is installed.
Returns: boolean True if installed
"""
return run_command_for_code(self.command_to_check_install) == 0