From 52d1b459cafa3ec742d7f3aab82bb9b29c61954e Mon Sep 17 00:00:00 2001 From: bachmann Date: Sat, 17 Jan 2015 13:36:20 -0500 Subject: [PATCH] Parse pylint report such that duplicate code violations will flag all involved files, not just one --- .../tests/fixtures/git_diff_code_dupe.txt | 21 ++++ diff_cover/tests/fixtures/pylint_dupe.txt | 102 ++++++++++++++++++ .../pylint_dupe_violations_report.txt | 12 +++ diff_cover/tests/test_integration.py | 7 ++ diff_cover/tests/test_violations_reporter.py | 2 +- diff_cover/violations_reporter.py | 62 ++++++++--- 6 files changed, 192 insertions(+), 14 deletions(-) create mode 100644 diff_cover/tests/fixtures/git_diff_code_dupe.txt create mode 100644 diff_cover/tests/fixtures/pylint_dupe.txt create mode 100644 diff_cover/tests/fixtures/pylint_dupe_violations_report.txt diff --git a/diff_cover/tests/fixtures/git_diff_code_dupe.txt b/diff_cover/tests/fixtures/git_diff_code_dupe.txt new file mode 100644 index 00000000..605fdc85 --- /dev/null +++ b/diff_cover/tests/fixtures/git_diff_code_dupe.txt @@ -0,0 +1,21 @@ +diff --git a/fileone.py b/fileone.py +index e69de29..3952627 100644 +--- a/fileone.py ++++ b/fileone.py +@@ -0,0 +1,16 @@ ++""" ++Fileone ++""" ++def selection_sort(to_sort): ++ """ ++ The greatest sorting algorithm? ++ """ ++ new_list = [] ++ final_size = len(to_sort) ++ while len(new_list) < final_size: ++ candidate_index = 0 ++ for index in xrange(len(to_sort)): ++ if to_sort[index] <= to_sort[candidate_index]: ++ candidate_index = index ++ new_list.append(to_sort.pop(candidate_index)) ++ return new_list \ No newline at end of file diff --git a/diff_cover/tests/fixtures/pylint_dupe.txt b/diff_cover/tests/fixtures/pylint_dupe.txt new file mode 100644 index 00000000..9b15f458 --- /dev/null +++ b/diff_cover/tests/fixtures/pylint_dupe.txt @@ -0,0 +1,102 @@ +************* Module filetwo +filetwo.py:1: [R0801(duplicate-code), ] Similar lines in 2 files +==fileone:3 +==filetwo:3 +def selection_sort(to_sort): + """ + The greatest sorting algorithm? + """ + new_list = [] + final_size = len(to_sort) + while len(new_list) < final_size: + candidate_index = 0 + for index in xrange(len(to_sort)): + if to_sort[index] <= to_sort[candidate_index]: + candidate_index = index + new_list.append(to_sort.pop(candidate_index)) + return new_list + + +Report +====== +21 statements analysed. + +Statistics by type +------------------ + ++---------+-------+-----------+-----------+------------+---------+ +|type |number |old number |difference |%documented |%badname | ++=========+=======+===========+===========+============+=========+ +|module |2 |2 |= |100.00 |0.00 | ++---------+-------+-----------+-----------+------------+---------+ +|class |0 |0 |= |0 |0 | ++---------+-------+-----------+-----------+------------+---------+ +|method |0 |0 |= |0 |0 | ++---------+-------+-----------+-----------+------------+---------+ +|function |2 |2 |= |100.00 |0.00 | ++---------+-------+-----------+-----------+------------+---------+ + + + +Raw metrics +----------- + ++----------+-------+------+---------+-----------+ +|type |number |% |previous |difference | ++==========+=======+======+=========+===========+ +|code |20 |52.63 |20 |= | ++----------+-------+------+---------+-----------+ +|docstring |12 |31.58 |12 |= | ++----------+-------+------+---------+-----------+ +|comment |0 |0.00 |0 |= | ++----------+-------+------+---------+-----------+ +|empty |6 |15.79 |6 |= | ++----------+-------+------+---------+-----------+ + + + +Duplication +----------- + ++-------------------------+-------+---------+-----------+ +| |now |previous |difference | ++=========================+=======+=========+===========+ +|nb duplicated lines |13 |13 |= | ++-------------------------+-------+---------+-----------+ +|percent duplicated lines |40.625 |40.625 |= | ++-------------------------+-------+---------+-----------+ + + + +Messages by category +-------------------- + ++-----------+-------+---------+-----------+ +|type |number |previous |difference | ++===========+=======+=========+===========+ +|convention |0 |0 |= | ++-----------+-------+---------+-----------+ +|refactor |1 |1 |= | ++-----------+-------+---------+-----------+ +|warning |0 |0 |= | ++-----------+-------+---------+-----------+ +|error |0 |0 |= | ++-----------+-------+---------+-----------+ + + + +Messages +-------- + ++---------------+------------+ +|message id |occurrences | ++===============+============+ +|duplicate-code |1 | ++---------------+------------+ + + + +Global evaluation +----------------- +Your code has been rated at 9.52/10 (previous run: 9.52/10, +0.00) + diff --git a/diff_cover/tests/fixtures/pylint_dupe_violations_report.txt b/diff_cover/tests/fixtures/pylint_dupe_violations_report.txt new file mode 100644 index 00000000..a06b554e --- /dev/null +++ b/diff_cover/tests/fixtures/pylint_dupe_violations_report.txt @@ -0,0 +1,12 @@ +------------- +Diff Quality +Quality Report: pylint +Diff: origin/master...HEAD, staged, and unstaged changes +------------- +fileone.py (93.8%): + 3: R0801: (duplicate-code), : Similar lines in 2 files +------------- +Total: 16 lines +Violations: 1 line +% Quality: 93% +------------- \ No newline at end of file diff --git a/diff_cover/tests/test_integration.py b/diff_cover/tests/test_integration.py index 26566055..55c2f9cf 100644 --- a/diff_cover/tests/test_integration.py +++ b/diff_cover/tests/test_integration.py @@ -449,6 +449,13 @@ def test_pre_generated_pylint_report(self): ['diff-quality', '--violations=pylint', 'pylint_report.txt'] ) + def test_pylint_report_with_dup_code_violation(self): + self._check_console_report( + 'git_diff_code_dupe.txt', + 'pylint_dupe_violations_report.txt', + ['diff-quality', '--violations=pylint', 'pylint_dupe.txt'] + ) + def _call_quality_expecting_error(self, tool_name, expected_error): """ Makes calls to diff_quality that should fail to ensure diff --git a/diff_cover/tests/test_violations_reporter.py b/diff_cover/tests/test_violations_reporter.py index 59ff3378..6e63ef4a 100644 --- a/diff_cover/tests/test_violations_reporter.py +++ b/diff_cover/tests/test_violations_reporter.py @@ -736,7 +736,7 @@ def test_quality(self): file1.py:149: [C0324, Foo.__dict__] Comma not followed by a space self.peer_grading._find_corresponding_module_for_location(Location('i4x','a','b','c','d')) file1.py:162: [R0801] Similar lines in 2 files - ==external_auth.views:1 + ==file1:162 ==student.views:4 import json import logging diff --git a/diff_cover/violations_reporter.py b/diff_cover/violations_reporter.py index 0663815b..e0f45573 100644 --- a/diff_cover/violations_reporter.py +++ b/diff_cover/violations_reporter.py @@ -398,14 +398,14 @@ class PyflakesQualityReporter(BaseQualityReporter): class Flake8QualityReporter(BaseQualityReporter): """ Report Flake8 violations. - + Flake8 warning/error codes: E***/W***: pep8 errors and warnings F***: pyflakes codes C9**: mccabe complexity plugin N8**: pep8-naming plugin T000: flake8-todo plugin - + http://flake8.readthedocs.org/en/latest/warnings.html """ COMMAND = 'flake8' @@ -422,11 +422,14 @@ class PylintQualityReporter(BaseQualityReporter): LEGACY_OPTIONS = ['-f', 'parseable', '--reports=no', '--include-ids=y'] OPTIONS = MODERN_OPTIONS EXTENSIONS = ['py'] + DUPE_CODE_VIOLATION = 'R0801' # Match lines of the form: # path/to/file.py:123: [C0111] Missing docstring # path/to/file.py:456: [C0111, Foo.bar] Missing docstring VIOLATION_REGEX = re.compile(r'^([^:]+):(\d+): \[(\w+),? ?([^\]]*)] (.*)$') + MULTI_LINE_VIOLATION_REGEX = re.compile(r'==(\w|.+):(.*)') + DUPE_CODE_MESSAGE_REGEX = re.compile(r'Similar lines in (\d+) files') def _run_command(self, src_path): try: @@ -439,32 +442,65 @@ def _run_command(self, src_path): else: raise + def _process_dupe_code_violation(self, lines, current_line, message): + """ + The duplicate code violation is a multi line error. This pulls out + all the relevant files + """ + src_paths = [] + message_match = self.DUPE_CODE_MESSAGE_REGEX.match(message) + if message_match: + for _ in range(int(message_match.group(1))): + current_line += 1 + match = self.MULTI_LINE_VIOLATION_REGEX.match( + lines[current_line] + ) + src_path, l_number = match.groups() + src_paths.append(('%s.py' % src_path, l_number)) + return src_paths + def _parse_output(self, output, src_path=None): """ See base class docstring. """ violations_dict = defaultdict(list) - for line in output.split('\n'): + output_lines = output.split('\n') + + for output_line_number, line in enumerate(output_lines): match = self.VIOLATION_REGEX.match(line) # Ignore any line that isn't matched # (for example, snippets from the source code) if match is not None: - pylint_src_path, line_number, pylint_code, function_name, message = match.groups() + (pylint_src_path, + line_number, + pylint_code, + function_name, + message) = match.groups() + if pylint_code == self.DUPE_CODE_VIOLATION: + files_involved = self._process_dupe_code_violation( + output_lines, + output_line_number, + message + ) + else: + files_involved = [(pylint_src_path, line_number)] - # If we're looking for a particular source file, - # ignore any other source files. - if src_path is None or src_path == pylint_src_path: + for violation in files_involved: + pylint_src_path, line_number = violation + # If we're looking for a particular source file, + # ignore any other source files. + if src_path is None or src_path == pylint_src_path: - if function_name: - error_str = u"{0}: {1}: {2}".format(pylint_code, function_name, message) - else: - error_str = u"{0}: {1}".format(pylint_code, message) + if function_name: + error_str = u"{0}: {1}: {2}".format(pylint_code, function_name, message) + else: + error_str = u"{0}: {1}".format(pylint_code, message) - violation = Violation(int(line_number), error_str) - violations_dict[pylint_src_path].append(violation) + violation = Violation(int(line_number), error_str) + violations_dict[pylint_src_path].append(violation) return violations_dict