Skip to content

Commit

Permalink
Avoid doing a regex search for line lengths unless necessary
Browse files Browse the repository at this point in the history
Before this change, we would check a line for pylint: disable-style
pragmas to determine whether to check the line length. The regex
check itself is very costly (~5% of pylint's total runtime in one
codebase), and is much more costly than the check itself.

This refactors the pylint check to instead do an approximate line
length check on everything, before using the regex to handle
exceptional, false negative cases like pragmas being the cause for the line length
overflow.

This change, in one sample codebase, lowered the check_lines
runtime from 5% of the total runtime to 0.35% of the total runtime
  • Loading branch information
rtpg authored and Pierre-Sassoulas committed Nov 16, 2020
1 parent e9b4f05 commit 9a5e1b3
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 15 deletions.
2 changes: 2 additions & 0 deletions CONTRIBUTORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -427,3 +427,5 @@ contributors:
* Takashi Hirashima: contributor

* Joffrey Mander: contributor

* Raphael Gaschignard: contributor
3 changes: 3 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ Release date: TBA

* Fix minor documentation issues

* Improve the performance of the line length check.


What's New in Pylint 2.6.0?
===========================

Expand Down
26 changes: 11 additions & 15 deletions pylint/checkers/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,9 +442,7 @@ def _check_keyword_parentheses(self, tokens: List[TokenInfo], start: int) -> Non

def _prepare_token_dispatcher(self):
dispatch = {}
for tokens, handler in [
(_KEYWORD_TOKENS, self._check_keyword_parentheses),
]:
for tokens, handler in [(_KEYWORD_TOKENS, self._check_keyword_parentheses)]:
for token in tokens:
dispatch[token] = handler
return dispatch
Expand Down Expand Up @@ -735,18 +733,20 @@ def check_lines(self, lines: str, lineno: int) -> None:

max_chars = self.config.max_line_length

potential_line_length_warning = False
split_lines = self.specific_splitlines(lines)

for offset, line in enumerate(split_lines):
self.check_line_ending(line, lineno + offset)

# hold onto the initial lineno for later
starting_lineno = lineno
for line in self.specific_splitlines(lines):
self.check_line_ending(line, lineno)
potential_line_length_warning = False
for offset, line in enumerate(split_lines):
# this check is purposefully simple and doesn't rstrip
# since this is running on every line you're checking it's
# advantageous to avoid doing a lot of work
if len(line) > max_chars:
potential_line_length_warning = True
lineno += 1
break

# if there were no lines passing the max_chars config, we don't bother
# running the full line check (as we've met an even more strict condition)
Expand All @@ -762,13 +762,9 @@ def check_lines(self, lines: str, lineno: int) -> None:
# The 'pylint: disable whatever' should not be taken into account for line length count
lines = self.remove_pylint_option_from_lines(mobj)

# reset the lineno back to its original value
# (since we iterated over stuff earlier)
lineno = starting_lineno

for line in self.specific_splitlines(lines):
self.check_line_length(line, lineno)
lineno += 1
# here we re-run specific_splitlines since we have filtered out pylint options above
for offset, line in enumerate(self.specific_splitlines(lines)):
self.check_line_length(line, lineno + offset)

def check_indent_level(self, string, expected, line_num):
"""return the indent level of the string"""
Expand Down

0 comments on commit 9a5e1b3

Please sign in to comment.