From dbb9ccbfa93792618a4c2ea236accee45870552c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20van=20Noord?= <13665637+DanielNoord@users.noreply.github.com> Date: Tue, 27 Jul 2021 21:12:31 +0200 Subject: [PATCH] Add unspecified-encoding checker #3826 This adds an unspecified-encoding checker that adds a warning whenever open() is called without an explicit encoding argument. This closes #3826 --- CONTRIBUTORS.txt | 2 + ChangeLog | 4 ++ doc/whatsnew/2.10.rst | 2 + pylint/checkers/stdlib.py | 45 ++++++++++++++- .../functional/u/unspecified_encoding_py3.py | 55 +++++++++++++++++++ .../functional/u/unspecified_encoding_py3.txt | 15 +++++ 6 files changed, 121 insertions(+), 2 deletions(-) create mode 100644 tests/functional/u/unspecified_encoding_py3.py create mode 100644 tests/functional/u/unspecified_encoding_py3.txt diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index f1d54dbd5c2..c0865d0c6d6 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -521,3 +521,5 @@ contributors: * Yilei Yang: contributor * Marcin Kurczewski (rr-): contributor + +* Daniel van Noord (DanielNoord): contributor diff --git a/ChangeLog b/ChangeLog index 778e863939b..52ce722f3df 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,6 +9,10 @@ Release date: TBA .. Put new features here and also in 'doc/whatsnew/2.10.rst' +* Added ``unspecified-encoding``: Emitted when open() is called without specifying an encoding + + Closes #3826 + What's New in Pylint 2.9.6? =========================== diff --git a/doc/whatsnew/2.10.rst b/doc/whatsnew/2.10.rst index 3e17dc78174..ee717524ada 100644 --- a/doc/whatsnew/2.10.rst +++ b/doc/whatsnew/2.10.rst @@ -12,7 +12,9 @@ Summary -- Release highlights New checkers ============ +* Added ``unspecified-encoding``: Emitted when open() is called without specifying an encoding + Closes #3826 Other Changes ============= diff --git a/pylint/checkers/stdlib.py b/pylint/checkers/stdlib.py index c0e789ad874..0e04da05c93 100644 --- a/pylint/checkers/stdlib.py +++ b/pylint/checkers/stdlib.py @@ -30,6 +30,7 @@ # Copyright (c) 2021 Marc Mueller <30130371+cdce8p@users.noreply.github.com> # Copyright (c) 2021 Matus Valo # Copyright (c) 2021 victor <16359131+jiajunsu@users.noreply.github.com> +# Copyright (c) 2021 Daniel van Noord <13665637+DanielNoord@users.noreply.github.com> # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html # For details: https://github.com/PyCQA/pylint/blob/main/LICENSE @@ -44,7 +45,7 @@ from pylint.checkers import BaseChecker, DeprecatedMixin, utils from pylint.interfaces import IAstroidChecker -OPEN_FILES = {"open", "file"} +OPEN_FILES_MODE = {"open", "file"} UNITTEST_CASE = "unittest.case" THREADING_THREAD = "threading.Thread" COPY_COPY = "copy.copy" @@ -425,6 +426,13 @@ class StdlibChecker(DeprecatedMixin, BaseChecker): "deprecated-decorator", "The decorator is marked as deprecated and will be removed in the future.", ), + "W1514": ( + "Using open without explicitly specifying an encoding", + "unspecified-encoding", + "It is better to specify an encoding when opening documents. " + "Using the system default implicitly can create problems on other operating systems. " + "See https://www.python.org/dev/peps/pep-0597/", + ), } def __init__(self, linter=None): @@ -485,6 +493,7 @@ def _check_shallow_copy_environ(self, node): "subprocess-popen-preexec-fn", "subprocess-run-check", "deprecated-class", + "unspecified-encoding", ) def visit_call(self, node): """Visit a Call node.""" @@ -494,8 +503,12 @@ def visit_call(self, node): if inferred is astroid.Uninferable: continue if inferred.root().name == OPEN_MODULE: - if getattr(node.func, "name", None) in OPEN_FILES: + if getattr(node.func, "name", None) in OPEN_FILES_MODE: self._check_open_mode(node) + if getattr(node.func, "name", None) in {"open"} or getattr( + node.func, "attrname", None + ) in {"open"}: + self._check_open_encoded(node) elif inferred.root().name == UNITTEST_CASE: self._check_redundant_assert(node, inferred) elif isinstance(inferred, astroid.ClassDef): @@ -573,6 +586,34 @@ def _check_open_mode(self, node): ): self.add_message("bad-open-mode", node=node, args=mode_arg.value) + def _check_open_encoded(self, node: astroid.Call) -> None: + """Check that the encoded argument of an open call is valid.""" + mode_arg = None + try: + mode_arg = utils.get_argument_from_call(node, position=1, keyword="mode") + except utils.NoSuchArgumentError: + pass + + if mode_arg: + mode_arg = utils.safe_infer(mode_arg) + if not mode_arg or "b" not in mode_arg.value: + encoding_arg = None + try: + encoding_arg = utils.get_argument_from_call( + node, position=None, keyword="encoding" + ) + except utils.NoSuchArgumentError: + self.add_message("unspecified-encoding", node=node) + + if encoding_arg: + encoding_arg = utils.safe_infer(encoding_arg) + + if ( + isinstance(encoding_arg, astroid.Const) + and encoding_arg.value is None + ): + self.add_message("unspecified-encoding", node=node) + def _check_env_function(self, node, infer): env_name_kwarg = "key" env_value_kwarg = "default" diff --git a/tests/functional/u/unspecified_encoding_py3.py b/tests/functional/u/unspecified_encoding_py3.py new file mode 100644 index 00000000000..20d2d7be1cb --- /dev/null +++ b/tests/functional/u/unspecified_encoding_py3.py @@ -0,0 +1,55 @@ +"""Warnings for using open() without specifying an encoding""" +# pylint: disable=consider-using-with +import io +import locale + +FILENAME = "foo.bar" +open(FILENAME, "w", encoding="utf-8") +open(FILENAME, "wb") +open(FILENAME, "w+b") +open(FILENAME) # [unspecified-encoding] +open(FILENAME, "wt") # [unspecified-encoding] +open(FILENAME, "w+") # [unspecified-encoding] +open(FILENAME, "w", encoding=None) # [unspecified-encoding] +open(FILENAME, "r") # [unspecified-encoding] + +with open(FILENAME, encoding="utf8", errors="surrogateescape") as f: + pass + +LOCALE_ENCODING = locale.getlocale()[1] +with open(FILENAME, encoding=LOCALE_ENCODING) as f: + pass + +with open(FILENAME) as f: # [unspecified-encoding] + pass + +with open(FILENAME, encoding=None) as f: # [unspecified-encoding] + pass + +LOCALE_ENCODING = None +with open(FILENAME, encoding=LOCALE_ENCODING) as f: # [unspecified-encoding] + pass + +io.open(FILENAME, "w+b") +io.open_code(FILENAME) +io.open(FILENAME) # [unspecified-encoding] +io.open(FILENAME, "wt") # [unspecified-encoding] +io.open(FILENAME, "w+") # [unspecified-encoding] +io.open(FILENAME, "w", encoding=None) # [unspecified-encoding] + +with io.open(FILENAME, encoding="utf8", errors="surrogateescape") as f: + pass + +LOCALE_ENCODING = locale.getlocale()[1] +with io.open(FILENAME, encoding=LOCALE_ENCODING) as f: + pass + +with io.open(FILENAME) as f: # [unspecified-encoding] + pass + +with io.open(FILENAME, encoding=None) as f: # [unspecified-encoding] + pass + +LOCALE_ENCODING = None +with io.open(FILENAME, encoding=LOCALE_ENCODING) as f: # [unspecified-encoding] + pass diff --git a/tests/functional/u/unspecified_encoding_py3.txt b/tests/functional/u/unspecified_encoding_py3.txt new file mode 100644 index 00000000000..cde7f223831 --- /dev/null +++ b/tests/functional/u/unspecified_encoding_py3.txt @@ -0,0 +1,15 @@ +unspecified-encoding:10:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:11:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:12:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:13:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:14:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:23:5::"Using open without explicitly specifying an encoding" +unspecified-encoding:26:5::"Using open without explicitly specifying an encoding" +unspecified-encoding:30:5::"Using open without explicitly specifying an encoding" +unspecified-encoding:35:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:36:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:37:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:38:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:47:5::"Using open without explicitly specifying an encoding" +unspecified-encoding:50:5::"Using open without explicitly specifying an encoding" +unspecified-encoding:54:5::"Using open without explicitly specifying an encoding"