diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index f1d54dbd5c2..c0865d0c6d6 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -521,3 +521,5 @@ contributors: * Yilei Yang: contributor * Marcin Kurczewski (rr-): contributor + +* Daniel van Noord (DanielNoord): contributor diff --git a/ChangeLog b/ChangeLog index 778e863939b..52ce722f3df 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,6 +9,10 @@ Release date: TBA .. Put new features here and also in 'doc/whatsnew/2.10.rst' +* Added ``unspecified-encoding``: Emitted when open() is called without specifying an encoding + + Closes #3826 + What's New in Pylint 2.9.6? =========================== diff --git a/doc/whatsnew/2.10.rst b/doc/whatsnew/2.10.rst index 3e17dc78174..ee717524ada 100644 --- a/doc/whatsnew/2.10.rst +++ b/doc/whatsnew/2.10.rst @@ -12,7 +12,9 @@ Summary -- Release highlights New checkers ============ +* Added ``unspecified-encoding``: Emitted when open() is called without specifying an encoding + Closes #3826 Other Changes ============= diff --git a/pylint/checkers/stdlib.py b/pylint/checkers/stdlib.py index c0e789ad874..732a51fa7fe 100644 --- a/pylint/checkers/stdlib.py +++ b/pylint/checkers/stdlib.py @@ -30,6 +30,7 @@ # Copyright (c) 2021 Marc Mueller <30130371+cdce8p@users.noreply.github.com> # Copyright (c) 2021 Matus Valo # Copyright (c) 2021 victor <16359131+jiajunsu@users.noreply.github.com> +# Copyright (c) 2021 Daniel van Noord <13665637+DanielNoord@users.noreply.github.com> # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html # For details: https://github.com/PyCQA/pylint/blob/main/LICENSE @@ -425,6 +426,13 @@ class StdlibChecker(DeprecatedMixin, BaseChecker): "deprecated-decorator", "The decorator is marked as deprecated and will be removed in the future.", ), + "W1514": ( + "Using open without explicitly specifying an encoding", + "unspecified-encoding", + "It is better to specify an encoding when opening documents. " + "Using the system default implicitly can create problems on other operating systems. " + "See https://www.python.org/dev/peps/pep-0597/", + ), } def __init__(self, linter=None): @@ -485,6 +493,7 @@ def _check_shallow_copy_environ(self, node): "subprocess-popen-preexec-fn", "subprocess-run-check", "deprecated-class", + "unspecified-encoding", ) def visit_call(self, node): """Visit a Call node.""" @@ -496,6 +505,8 @@ def visit_call(self, node): if inferred.root().name == OPEN_MODULE: if getattr(node.func, "name", None) in OPEN_FILES: self._check_open_mode(node) + if getattr(node.func, "name", None) == "open": + self._check_open_encoded(node) elif inferred.root().name == UNITTEST_CASE: self._check_redundant_assert(node, inferred) elif isinstance(inferred, astroid.ClassDef): @@ -573,6 +584,34 @@ def _check_open_mode(self, node): ): self.add_message("bad-open-mode", node=node, args=mode_arg.value) + def _check_open_encoded(self, node: astroid.Call) -> None: + """Check that the encoded argument of an open call is valid.""" + mode_arg = None + try: + mode_arg = utils.get_argument_from_call(node, position=1, keyword="mode") + except utils.NoSuchArgumentError: + pass + + if mode_arg: + mode_arg = utils.safe_infer(mode_arg) + if not mode_arg or "b" not in mode_arg.value: + encoding_arg = None + try: + encoding_arg = utils.get_argument_from_call( + node, position=None, keyword="encoding" + ) + except utils.NoSuchArgumentError: + self.add_message("unspecified-encoding", node=node) + + if encoding_arg: + encoding_arg = utils.safe_infer(encoding_arg) + + if ( + isinstance(encoding_arg, astroid.Const) + and encoding_arg.value is None + ): + self.add_message("unspecified-encoding", node=node) + def _check_env_function(self, node, infer): env_name_kwarg = "key" env_value_kwarg = "default" diff --git a/tests/functional/u/unspecified_encoding_py3.py b/tests/functional/u/unspecified_encoding_py3.py new file mode 100644 index 00000000000..11ce156269c --- /dev/null +++ b/tests/functional/u/unspecified_encoding_py3.py @@ -0,0 +1,30 @@ +"""Warnings for using open() without specifying an encoding""" +# pylint: disable=consider-using-with +import locale + +FILENAME = "foo.bar" +open(FILENAME, "w", encoding="utf-8") +open(FILENAME, "wb") +open(FILENAME, "w+b") +open(FILENAME) # [unspecified-encoding] +open(FILENAME, "wt") # [unspecified-encoding] +open(FILENAME, "w+") # [unspecified-encoding] +open(FILENAME, "w", encoding=None) # [unspecified-encoding] +open(FILENAME, "r") # [unspecified-encoding] + +with open(FILENAME, encoding="utf8", errors="surrogateescape") as f: + pass + +LOCALE_ENCODING = locale.getlocale()[1] +with open(FILENAME, encoding=LOCALE_ENCODING) as f: + pass + +with open(FILENAME) as f: # [unspecified-encoding] + pass + +with open(FILENAME, encoding=None) as f: # [unspecified-encoding] + pass + +LOCALE_ENCODING = None +with open(FILENAME, encoding=LOCALE_ENCODING) as f: # [unspecified-encoding] + pass diff --git a/tests/functional/u/unspecified_encoding_py3.txt b/tests/functional/u/unspecified_encoding_py3.txt new file mode 100644 index 00000000000..5f0cf89d774 --- /dev/null +++ b/tests/functional/u/unspecified_encoding_py3.txt @@ -0,0 +1,8 @@ +unspecified-encoding:9:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:10:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:11:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:12:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:13:0::"Using open without explicitly specifying an encoding" +unspecified-encoding:22:5::"Using open without explicitly specifying an encoding" +unspecified-encoding:25:5::"Using open without explicitly specifying an encoding" +unspecified-encoding:29:5::"Using open without explicitly specifying an encoding"