Skip to content

Commit

Permalink
Restructure the AST parsing heuristic to always pick the same module
Browse files Browse the repository at this point in the history
When a file contained a misplaced type annotation, we were retrying the parsing
without type comments support. That second parsing was using the builtin ast module,
but the rest of the tree utilities (the builder and rebuilder) were not aware of the
new parsing module that was used to build the AST nodes a second time.

This commit moves the logic of picking the parsing module and the corresponding
AST node mapping in a single place, which can be used by both the builder
and the rebuilder.

Close pylint-dev/pylint#3540
Close #773
  • Loading branch information
PCManticore committed Apr 29, 2020
1 parent b9974d5 commit efb5155
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 103 deletions.
5 changes: 5 additions & 0 deletions ChangeLog
Expand Up @@ -10,6 +10,11 @@ Release Date: TBA

Close #772

* Restructure the AST parsing heuristic to always pick the same module

Close PyCQA/pylint#3540
Close #773


What's New in astroid 2.4.0?
============================
Expand Down
123 changes: 100 additions & 23 deletions astroid/_ast.py
Expand Up @@ -4,10 +4,11 @@
from typing import Optional
import sys

_ast_py2 = _ast_py3 = None
import astroid

_ast_py3 = None
try:
import typed_ast.ast3 as _ast_py3
import typed_ast.ast27 as _ast_py2
except ImportError:
pass

Expand All @@ -21,28 +22,30 @@
FunctionType = namedtuple("FunctionType", ["argtypes", "returns"])


def _get_parser_module(parse_python_two=False, type_comments_support=True):
if not type_comments_support:
return ast

if parse_python_two:
parser_module = _ast_py2
else:
parser_module = _ast_py3
return parser_module or ast


def _parse(string: str, parse_python_two=False, type_comments=True):
parse_module = _get_parser_module(
parse_python_two=parse_python_two, type_comments_support=type_comments
class ParserModule(
namedtuple(
"ParserModule",
[
"module",
"unary_op_classes",
"cmp_op_classes",
"bool_op_classes",
"bin_op_classes",
"context_classes",
],
)
parse_func = parse_module.parse
if parse_module is _ast_py3:
if PY38:
parse_func = partial(parse_func, type_comments=type_comments)
if not parse_python_two:
parse_func = partial(parse_func, feature_version=sys.version_info.minor)
return parse_func(string)
):
def parse(self, string: str, type_comments=True):
if self.module is _ast_py3:
if PY38:
parse_func = partial(self.module.parse, type_comments=type_comments)
else:
parse_func = partial(
self.module.parse, feature_version=sys.version_info.minor
)
else:
parse_func = self.module.parse
return parse_func(string)


def parse_function_type_comment(type_comment: str) -> Optional[FunctionType]:
Expand All @@ -52,3 +55,77 @@ def parse_function_type_comment(type_comment: str) -> Optional[FunctionType]:

func_type = _ast_py3.parse(type_comment, "<type_comment>", "func_type")
return FunctionType(argtypes=func_type.argtypes, returns=func_type.returns)


def get_parser_module(type_comments=True) -> ParserModule:
if not type_comments:
parser_module = ast
else:
parser_module = _ast_py3
parser_module = parser_module or ast

unary_op_classes = _unary_operators_from_module(parser_module)
cmp_op_classes = _compare_operators_from_module(parser_module)
bool_op_classes = _bool_operators_from_module(parser_module)
bin_op_classes = _binary_operators_from_module(parser_module)
context_classes = _contexts_from_module(parser_module)

return ParserModule(
parser_module,
unary_op_classes,
cmp_op_classes,
bool_op_classes,
bin_op_classes,
context_classes,
)


def _unary_operators_from_module(module):
return {module.UAdd: "+", module.USub: "-", module.Not: "not", module.Invert: "~"}


def _binary_operators_from_module(module):
binary_operators = {
module.Add: "+",
module.BitAnd: "&",
module.BitOr: "|",
module.BitXor: "^",
module.Div: "/",
module.FloorDiv: "//",
module.MatMult: "@",
module.Mod: "%",
module.Mult: "*",
module.Pow: "**",
module.Sub: "-",
module.LShift: "<<",
module.RShift: ">>",
}
return binary_operators


def _bool_operators_from_module(module):
return {module.And: "and", module.Or: "or"}


def _compare_operators_from_module(module):
return {
module.Eq: "==",
module.Gt: ">",
module.GtE: ">=",
module.In: "in",
module.Is: "is",
module.IsNot: "is not",
module.Lt: "<",
module.LtE: "<=",
module.NotEq: "!=",
module.NotIn: "not in",
}


def _contexts_from_module(module):
return {
module.Load: astroid.Load,
module.Store: astroid.Store,
module.Del: astroid.Del,
module.Param: astroid.Store,
}
21 changes: 11 additions & 10 deletions astroid/builder.py
Expand Up @@ -22,7 +22,7 @@
import textwrap
from tokenize import detect_encoding

from astroid._ast import _parse
from astroid._ast import get_parser_module
from astroid import bases
from astroid import exceptions
from astroid import manager
Expand All @@ -42,7 +42,7 @@
# The comment used to select a statement to be extracted
# when calling extract_node.
_STATEMENT_SELECTOR = "#@"

MISPLACED_TYPE_ANNOTATION_ERROR = "misplaced type annotation"
MANAGER = manager.AstroidManager()


Expand Down Expand Up @@ -165,7 +165,7 @@ def _post_build(self, module, encoding):
def _data_build(self, data, modname, path):
"""Build tree node from data and add some informations"""
try:
node = _parse_string(data)
node, parser_module = _parse_string(data, type_comments=True)
except (TypeError, ValueError, SyntaxError) as exc:
raise exceptions.AstroidSyntaxError(
"Parsing Python code failed:\n{error}",
Expand All @@ -174,6 +174,7 @@ def _data_build(self, data, modname, path):
path=path,
error=exc,
) from exc

if path is not None:
node_file = os.path.abspath(path)
else:
Expand All @@ -186,7 +187,7 @@ def _data_build(self, data, modname, path):
path is not None
and os.path.splitext(os.path.basename(path))[0] == "__init__"
)
builder = rebuilder.TreeRebuilder(self._manager)
builder = rebuilder.TreeRebuilder(self._manager, parser_module)
module = builder.visit_module(node, modname, node_file, package)
module._import_from_nodes = builder._import_from_nodes
module._delayed_assattr = builder._delayed_assattr
Expand Down Expand Up @@ -438,17 +439,17 @@ def _extract(node):
return extracted


MISPLACED_TYPE_ANNOTATION_ERROR = "misplaced type annotation"


def _parse_string(data, type_comments=True):
parser_module = get_parser_module(type_comments=type_comments)
try:
node = _parse(data + "\n", type_comments=type_comments)
parsed = parser_module.parse(data + "\n", type_comments=type_comments)
except SyntaxError as exc:
# If the type annotations are misplaced for some reason, we do not want
# to fail the entire parsing of the file, so we need to retry the parsing without
# type comment support.
if exc.args[0] != MISPLACED_TYPE_ANNOTATION_ERROR or not type_comments:
raise
node = _parse(data + "\n", type_comments=False)
return node

parser_module = get_parser_module(type_comments=False)
parsed = parser_module.parse(data + "\n", type_comments=False)
return parsed, parser_module

0 comments on commit efb5155

Please sign in to comment.