Skip to content

Commit

Permalink
Similar check: Passe min_lines to recombined.
Browse files Browse the repository at this point in the history
  • Loading branch information
JulienPalard authored and Pierre-Sassoulas committed Jul 28, 2021
1 parent 107f59e commit c00b07b
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 2 deletions.
3 changes: 2 additions & 1 deletion ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ Release date: TBA

Closes #3826

* Improved the Similarity checker performance.
* Improved the Similarity checker performance. Fix issue with ``--min-similarity-lines`` used with ``--jobs``.

Close #4120
Close #4118

* Don't emit ``no-member`` error if guarded behind if statement.

Expand Down
2 changes: 1 addition & 1 deletion pylint/lint/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def check_parallel(linter, jobs, files, arguments=None):
pool.join()

_merge_mapreduce_data(linter, all_mapreduce_data)
linter.stats = _merge_stats(all_stats)
linter.stats = _merge_stats([linter.stats] + all_stats)

# Insert stats data to local checkers.
for checker in linter.get_checkers():
Expand Down
142 changes: 142 additions & 0 deletions tests/test_check_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,96 @@ def process_module(self, _astroid):
self.data.append(record)


class ParallelTestChecker(BaseChecker):
"""A checker that does need to consolidate data.
To simulate the need to consolidate data, this checker only
reports a message for pairs of files.
On non-parallel builds: it works on all the files in a single run.
On parallel builds: lint.parallel calls ``open`` once per file.
So if files are treated by separate processes, no messages will be
raised from the individual process, all messages will be raised
from reduce_map_data.
"""

__implements__ = (pylint.interfaces.IRawChecker,)

name = "parallel-checker"
test_data = "parallel"
msgs = {
"R9999": (
"Test %s",
"parallel-test-check",
"Some helpful text.",
)
}

def __init__(self, linter, *args, **kwargs):
super().__init__(linter, *args, **kwargs)
self.data = []
self.linter = linter
self.stats = None

def open(self):
"""init the checkers: reset statistics information"""
self.stats = self.linter.add_stats()
self.data = []

def close(self):
for _ in self.data[1::2]: # Work on pairs of files, see class docstring.
self.add_message("R9999", args=("From process_module, two files seen.",))

def get_map_data(self):
return self.data

def reduce_map_data(self, linter, data):
recombined = type(self)(linter)
recombined.open()
aggregated = []
for d in data:
aggregated.extend(d)
for _ in aggregated[1::2]: # Work on pairs of files, see class docstring.
self.add_message("R9999", args=("From reduce_map_data",))
recombined.close()

def process_module(self, _astroid):
"""Called once per stream/file/astroid object"""
# record the number of invocations with the data object
record = self.test_data + str(len(self.data))
self.data.append(record)


class ExtraSequentialTestChecker(SequentialTestChecker):
"""A checker that does not need to consolidate data across run invocations"""

name = "extra-sequential-checker"
test_data = "extra-sequential"


class ExtraParallelTestChecker(ParallelTestChecker):
"""A checker that does need to consolidate data across run invocations"""

name = "extra-parallel-checker"
test_data = "extra-parallel"


class ThirdSequentialTestChecker(SequentialTestChecker):
"""A checker that does not need to consolidate data across run invocations"""

name = "third-sequential-checker"
test_data = "third-sequential"


class ThirdParallelTestChecker(ParallelTestChecker):
"""A checker that does need to consolidate data across run invocations"""

name = "third-parallel-checker"
test_data = "third-parallel"


class TestCheckParallelFramework:
"""Tests the check_parallel() function's framework"""

Expand Down Expand Up @@ -402,3 +478,69 @@ def test_compare_workers_to_single_proc(self, num_files, num_jobs, num_checkers)
assert (
stats_check_parallel == expected_stats
), "The lint is returning unexpected results, has something changed?"

@pytest.mark.parametrize(
"num_files,num_jobs,num_checkers",
[
(2, 2, 1),
(2, 2, 2),
(2, 2, 3),
(3, 2, 1),
(3, 2, 2),
(3, 2, 3),
(3, 1, 1),
(3, 1, 2),
(3, 1, 3),
(3, 5, 1),
(3, 5, 2),
(3, 5, 3),
(10, 2, 1),
(10, 2, 2),
(10, 2, 3),
(2, 10, 1),
(2, 10, 2),
(2, 10, 3),
],
)
def test_map_reduce(self, num_files, num_jobs, num_checkers):
"""Compares the 3 key parameters for check_parallel() produces the same results
The intent here is to validate the reduce step: no stats should be lost.
Checks regression of https://github.com/PyCQA/pylint/issues/4118
"""

# define the stats we expect to get back from the runs, these should only vary
# with the number of files.
file_infos = _gen_file_datas(num_files)

# Loop for single-proc and mult-proc so we can ensure the same linter-config
for do_single_proc in range(2):
linter = PyLinter(reporter=Reporter())

# Assign between 1 and 3 checkers to the linter, they should not change the
# results of the lint
linter.register_checker(ParallelTestChecker(linter))
if num_checkers > 1:
linter.register_checker(ExtraParallelTestChecker(linter))
if num_checkers > 2:
linter.register_checker(ThirdParallelTestChecker(linter))

if do_single_proc:
# establish the baseline
assert (
linter.config.jobs == 1
), "jobs>1 are ignored when calling _check_files"
linter._check_files(linter.get_ast, file_infos)
stats_single_proc = linter.stats
else:
check_parallel(
linter,
jobs=num_jobs,
files=file_infos,
arguments=None,
)
stats_check_parallel = linter.stats
assert (
stats_single_proc["by_msg"] == stats_check_parallel["by_msg"]
), "Single-proc and check_parallel() should return the same thing"

0 comments on commit c00b07b

Please sign in to comment.