Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UnicodeError Parsing TOML #961

Closed
adam-grant-hendry opened this issue Oct 23, 2022 · 2 comments
Closed

UnicodeError Parsing TOML #961

adam-grant-hendry opened this issue Oct 23, 2022 · 2 comments
Labels
bug Something isn't working

Comments

@adam-grant-hendry
Copy link

adam-grant-hendry commented Oct 23, 2022

Describe the bug

NB: Using branch main (specifically, version 1.7.5.dev60) instead of version 1.7.4 resolves this issue. Please read below, including the Additional context section.

Similar to Issue #362, fixed in PR #364, bandit does not decode Unicode on Windows since Windows defaults to cp1252 encoding.

This was first noticed when using commitizen with emojis in a pyproject.toml and running pre-commit. The corresponding repo can be found here:

https://github.com/adam-grant-hendry/qtpygraph

pyproject.toml
[tool.poetry]
name = "qtpygraph"
version = "0.2.0"
description = "A pythonic interface to the Qt Graphics View Framework using qtpy"
authors = ["Hendry, Adam <adam.grant.hendry@gmail.com>"]
license = "Apache-2.0"
readme = "README.rst"
repository = "https://github.com/adam-grant-hendry/qtpygraph"
classifiers = [
    "License :: OSI Approved :: Apache Software License",
    "Operating System :: Microsoft :: Windows :: Windows 10",
    "Operating System :: MacOS :: MacOS X",
    "Operating System :: POSIX :: Linux",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.8",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10"
]
packages = [
    {include = "docs"},
    {include = "qtpygraph"},
    {include = "tests"},
    {include = "stubs"}
]

[tool.poetry.dependencies]
add-trailing-comma = ">=2.2.3"
appdirs = ">=1.4.4"
bandit = { extras = ["toml"], version = ">=1.7.4" }
beautifulsoup4 = ">=4.11.1"
black = ">=22.3.0"
blackdoc = ">=0.3.4"
blacken-docs = ">=1.12.1"
check-jsonschema = ">=0.14.3"
codecov = ">=2.1.12"
codespell = ">=2.1.0"
commitizen = { git = "https://github.com/adam-grant-hendry/commitizen.git", branch = "feat/unicode" }
coverage = { extras = ["toml"], version = ">=6.4" }
doc8 = ">=0.11.2"
docformatter = ">=1.4"
flake8 = ">=4.0.1"
flake8-bugbear = ">=22.9.11"
flake8-quotes = ">=3.3.1"
glfw = ">=2.5.3"
graphviz = ">=0.20"
h5py = ">=3.6.0"
html5lib = ">=1.1"
hypothesis = ">=6.46.2"
instaviz = ">=0.6.0"
ipdb = ">=0.13.9"
ipython = ">=8.3.0"
isort = ">=5.10.1"
lxml = ">=4.8.0"
matplotlib = ">=3.5.2"
memory-profiler = ">=0.60.0"
merry = ">=0.3.0"
mypy = ">=0.961"
mypy-extensions = ">=0.4.3"
myst-parser = ">=0.17.2"
ninja = ">=1.10.2"
numpy = ">=1.22.3"
numpydoc = ">=1.4.0"
objgraph = ">=3.5.0"
opencv-python = ">=4.5.5.64"
pep8-naming = ">=0.12.1"
pre-commit = ">=2.18.1"
pycln = ">=1.3.5"
pydantic = ">=1.9.1"
pydocstringformatter = ">=0.7.0"
pydocstyle = { extras = ["toml"], version = ">=6.1.1" }
pyinstaller = ">=5.1"
pylint = ">=2.13.8"
pyright = ">=1.1.253"
pytest = ">=7.1.2"
pytest-cov = ">=3.0.0"
pytest-doctestplus = ">=0.12.0"
pytest-env = ">=0.6.2"
pytest-memprof = ">=0.2.0"
pytest-mock = ">=3.7.0"
pytest-qt = ">=4.0.2"
pytest-randomly = ">=3.11.0"
pytest-xdist = ">=2.5.0"
python = ">=3.8,<3.11"
pyupgrade = ">=2.34.0"
PyYAML = ">=6.0"
QtPy = ">=2.1.0"
requests = ">=2.27.1"
rstcheck = { extras = ["toml", "sphinx" ], version = ">=5.0.0" }
seedir = ">=0.3.0"
setuptools = "^65.3.0"
setuptools-scm = "^7.0.5"
Sphinx = ">=4.5.0"
sphinx-book-theme = "^0.3.3"
sphinxcontrib-email = ">=0.3.5"
sphinxcontrib-mermaid = "^0.7.1"
sphinxcontrib-napoleon = ">=0.7"
toml = ">=0.10.2"
tomli = ">=2.0.1"
tox = ">=3.25.0"
tqdm = ">=4.64.0"
tqdm-stubs = ">=0.2.0"
types-beautifulsoup4 = ">=4.11.1"
types-setuptools = ">=57.4.17"
types-toml = ">=0.10.7"
typing-extensions = ">=4.2.0"
vulture = ">=2.3"
wheel = ">=0.37.1"

[tool.bandit]
skips = [
    "B101",
    "B301",
    "B403",
]

[tool.black]
line-length = 90
skip-string-normalization = true
target-version = ["py38"]
include = '.*\.pyi?$'
exclude = '\.eggs|\.git|\.mypy_cache|\.tox|\.venv|build|dist'

[tool.commitizen]
name = "cz_customize"
version = "0.2.0"
version_files = [
    "pyproject.toml:version",
]
update_changelog_on_bump = true
annotated_tag = true
gpg_sign = true
tag_format = "$version"
style = [
    ["qmark", "fg:#ff9d00 bold"],
    ["question", "italic"],
    ["answer", "fg:#ff9d00 bold"],
    ["pointer", "fg:#ff9d00 bold"],
    ["highlighted", "fg:#ff9d00 bold"],
    ["selected", "fg:#cc5454"],
    ["separator", "fg:#cc5454"],
    ["instruction", ""],
    ["text", ""],
    ["disabled", "fg:#858585 italic"]
]

[tool.commitizen.customize]
message_template = "{{change_type}}({{scope}}){% if is_breaking_change == true %}!{% endif %}: {{subject}}{% if body %}\n\n{{body}}{% endif %}{% if footer %}\n\n{{footer}}{% endif %}"
schema_pattern = '(✨ feat|🐛 fix|♻️ refactor|📚 docs|🚀 release|🤖 ci|🧪 test|⬆️ perf|🗑️ deprecate|🧹 style|❓ other)(\(\S+\))?!?:(\s.*)'
bump_pattern = '^((?:✨ )feat|(?:🐛 )fix|(?:♻️ )refactor|(?:📚 )docs|(?:🚀 )release|(?:🤖 )ci|(?:🧪 )test|(?:⬆️ )perf|(?:🗑️ )deprecate|(?:🧹 )style|(?:❓ )other)(\(.+\))?(!)?'
bump_map = { ".*!" = "MAJOR", "✨ feat" = "MINOR", "🐛 fix" = "PATCH", "♻️ refactor" = "PATCH", "📚 docs" = "PATCH", "🚀 release" = "PATCH", "🤖 ci" = "PATCH", "🧪 test" = "PATCH", "⬆️ perf" = "PATCH", "🗑️ deprecate" = "PATCH", "🧹 style" = "PATCH", "❓ other" = "PATCH" }
change_type_order = ["!", "✨ feat", "🐛 fix", "♻️ refactor", "📚 docs", "🚀 release", "🤖 ci", "🧪 test", "⬆️ perf", "🗑️ deprecate", "🧹 style", "❓ other"]
commit_parser = '^(?P<change_type>(?:✨ )feat|🐛 fix|♻️ refactor|📚 docs|🚀 release|🤖 ci|🧪 test|⬆️ perf|🗑️ deprecate|🧹 style|❓ other)(?:\((?P<scope>[^()\r\n]*)\)|\()?(?P<breaking>!)?:\s(?P<message>.*)?'
changelog_pattern = '^(feat|🐛 fix|♻️ refactor|📚 docs|🚀 release|🤖 ci|🧪 test|⬆️ perf|🗑️ deprecate|🧹 style|❓ other)?(?:[)])(!)?'
change_type_map = { "✨ feat" = "Feat", "🐛 fix" = "Fix", "♻️ refactor" = "Refactor", "📚 docs" = "Docs", "🚀 release" = "Release", "🤖 ci" = "CI", "🧪 test" = "Test", "⬆️ perf" = "Performance", "🗑️ deprecate" = "Deprecate", "🧹 style" = "Style", "❓ other" = "Other"}

[[tool.commitizen.customize.questions]]
name = "change_type"
type = "list"
message = "Select the type of change you are committing"
choices = [
    { value = "✨ feat", name = "✨ feat: (Bumps MINOR) Add/remove an item/feature" },
    { value = "🐛 fix", name = "🐛 fix: (Bumps PATCH) Fix/modify an existing item/feature" },
    { value = "♻️ refactor", name = "♻️  refactor: (Bumps PATCH) Reorganizes item(s); not a 'feat' or 'fix'" },
    { value = "📚 docs", name = "📚 docs: (Bumps X) Does Y" },
    { value = "🚀 release", name = "🚀 release: (Bumps X) Does Y" },
    { value = "🤖 ci", name = "🤖 ci: (Bumps X) Does Y" },
    { value = "🧪 test", name = "🧪 test: (Bumps X) Does Y" },
    { value = "⬆️ perf", name = "⬆️  perf: (Bumps X) Does Y" },
    { value = "🗑️ deprecate", name = "🗑️  deprecate: (Bumps X) Does Y" },
    { value = "🧹 style", name = "🧹 style: (Bumps X) Does Y" },
    { value = "❓ other", name = "❓ other: (Bumps X) Does Y" },
]

[[tool.commitizen.customize.questions]]
name = "scope"
type = "input"
message = "Scope. Enter the scope of the change, category first (docs/test/ci/build/perf), followed by class or file name if applicable (comma-separated, no spaces):\n"

[[tool.commitizen.customize.questions]]
name = "subject"
type = "input"
message = "Subject. Enter the short summary of the change (imperative tone, lowercase, no period):\n"

[[tool.commitizen.customize.questions]]
name = "is_breaking_change"
type = "confirm"
message = "Is this a BREAKING CHANGE (backwards incompatible)? (Bumps MAJOR; default: N):\n"
default = false

[[tool.commitizen.customize.questions]]
name = "body"
type = "input"
message = "Body. Enter complete details about the change (use full sentences with proper grammar): (press [enter] to skip):\n"

[[tool.commitizen.customize.questions]]
name = "footer"
type = "input"
message = "Footer. Reference any Issues this change addresses. If a BREAKING CHANGE, enter details. (press [enter] to skip):\n"

[tool.coverage.run]
branch = true
# Append machine name, process id, and random number to data file name so coverage can be
# run in parallel environments, e.g. in tox.
parallel = true
# `shiboken6`, which creates the python bindings for `Qt` C++ source, imports from a
# `zip` file into the top-level directory at runtime. These files are deleted after
# running, but `coverage` attempts to look at their source after they're gone, causing
# warnings to appear. Namely, it looks for these modules/files:
#
#    project_dir/pysrcript
#    project_dir/shibokensupport
#    project_dir/signature_bootstrap.py
#
# To avoid this error, `source` is specified to the package subdirectory. However, this
# can also be avoided by explicitly omitting these folders in the `omit` section.
#
# See https://github.com/nedbat/coveragepy/issues/1392
source = [
    'qtpygraph/'
]
omit = [
    'qtpygraph/__main__.py',
    '.vscode/',
    '.venv/',
    'tests/',
    'stubs/'
]
disable_warnings = ['no-data-collected']

[tool.coverage.html]
directory = 'logs/coverage/html'

[tool.coverage.json]
output = 'logs/coverage/coverage.json'

[tool.coverage.xml]
output = 'logs/coverage/coverage.xml'

[tool.coverage.report]
exclude_lines = [
    'pragma: no cover',
    'def __repr__',
    'raise AssertionError',
    'raise NotImplementedError',
    'if __name__ == .__main__.:',
    '@(abc\.)?abstractmethod'
]

[tool.coverage.paths]
source = [
    "qtpygraph/",
    "*/site-packages"
]
others = [
    "qtpygraph/",
    "*/qtpygraph",
]

[tool.doc8]
ignore = [
    "D004"  # Ignore CRLF (Not working: see https://github.com/PyCQA/doc8/issues/78)
]
max-line-length = 90

[tool.isort]
profile = "black"
add_imports = [
    "from __future__ import annotations"  # Automatically add to module on save if not there
]
multi_line_output = 3
include_trailing_comma = true
force_grid_wrap = 0
use_parentheses = true
ensure_newline_before_comments = true
line_length = 90
skip_glob = [
    # Certain packages require a specific sort order
    # See: https://github.com/pyvista/pyvista/issues/3141
    ".venv/Lib/site-packages/*",
]

[tool.mypy]
python_executable = ".venv/Scripts/python.exe"
python_version = "3.8"
disallow_untyped_defs = true
warn_return_any = true
warn_unused_configs = true
# ``warn_unused_ignores`` seems to be giving several false positives.
# See:
#   - https://github.com/python/mypy/issues/4412
#   - https://github.com/python/mypy/issues/5940
#   - https://github.com/python/mypy/issues/8823
#   - https://github.com/python/mypy/issues/2960
warn_unused_ignores = false
warn_redundant_casts = true
show_error_codes = true
no_pretty = true
show_column_numbers = true
plugins = [
    "pydantic.mypy"
]
exclude = [
    'stubs/',
    '[.]venv/',
    'build/',
    'dist/',
    'ci/',
]
fast_module_lookup = true

[[tool.mypy.overrides]]
module = [
    "stubs.*",
]
ignore_errors = true
follow_imports = "skip"
follow_imports_for_stubs = false
ignore_missing_imports = true

[tool.pycln]
all = true
include = '.*\.pyi?$'

[tool.pydocstyle]
convention = "numpy"
ignore = [
    # Magic methods don't need docstrings
    "D105"
]

[tool.pylint.format]
# Ignore ``line-too-long`` errors for hyperlinks, which must remain on one line
ignore-long-lines = "https?://\\S+?$"

[tool.pylint.main]
# ``pylint`` cannot lint (perform static analysis on) C extension modules since it has
# no way to get an AST object out of the extension module. ``extension-pkg-allow-list``
# is a ``pylint`` mechanism through which you can tell ``pylint`` to import C extension
# modules and build an AST from that imported module. Be aware that using this flag
# means ``pylint`` loads extensions into the active Python interpreter and may run
# arbitrary code, which you may not want and is the reason why ``pylint`` disables it by
# default.
# See:
# - https://pylint.pycqa.org/en/latest/user_guide/messages/error/no-member.html?highlight=%22c%20extension%22#no-member-e1101
# - https://github.com/PyCQA/pylint/issues/1524#issuecomment-1214435049
extension-pkg-allow-list = [
    "PyQt5",
    "PyQt6",
    "PySide2",
    "PySide6",
]
fail-under = 10
ignore = [
    ".venv",
]
load-plugins = [
    "pylint.extensions.bad_builtin",
    "pylint.extensions.broad_try_clause",
    "pylint.extensions.check_elif",
    "pylint.extensions.consider_ternary_expression",
    "pylint.extensions.docparams",
    # I like lines between paragraphs
    # "pylint.extensions.empty_comment",
    "pylint.extensions.redefined_loop_name",
    # Untidy data structures require while
    # See: https://pylint.pycqa.org/en/latest/user_guide/messages/warning/while-used.html
    # See: https://stackoverflow.com/questions/920645/when-to-use-while-or-for-in-python
    # "pylint.extensions.while_used",
    "pylint.extensions.for_any_all",
    "pylint.extensions.no_self_use",
    "pylint.extensions.set_membership",
]

[tool.pylint."messages control"]
confidence = []
disable = [
    "too-few-public-methods",
    "too-many-arguments",
    "too-many-instance-attributes",
    "useless-import-alias",
    "fixme"  # TODO's can be useful
]

[tool.pylint.parameter_documentation]
default-docstring-type = "numpy"

[tool.pyright]
defineConstant = [
  { PYSIDE6 = true },
  { PYQT5 = false },
  { PYSIDE2 = false },
  { PYQT6 = false }
]

[tool.pytest.ini_options]
minversion = "7.0"
# We want different runs each time (don't use seed)
# --randomly-seed=1234
addopts = """\
--last-failed --last-failed-no-failures all \
-p no:faulthandler \
--import-mode=importlib \
--cov \
--cov-report term-missing \
--doctest-rst \
--doctest-modules"""
testpaths = [
    "tests",
]
doctest_plus = "enabled"
env = [
    # See https://github.com/pytest-dev/pytest-qt/issues/437
    # "D:PYTEST_QT_API=pyside6",  # pytest-qt
    # "D:QT_API=pyside6"  # qtpy
    # "D:QT_QPA_PLATFORM=offscreen"
    # "D:COVERAGE_DEBUG=trace",
    # "D:COVERAGE_DEBUG_FILE=debug_log.txt"
]
filterwarnings = [
    # Occurs when mocking QWidgets
    'ignore:pyside_type_init:RuntimeWarning'
]

[tool.rstcheck]
# `rstcheck` is known to be buggy on Windows
# See Issue #107: https://github.com/rstcheck/rstcheck/issues/107
ignore_messages = [
    "(Duplicate label .*, other instance in .*)"
]

[tool.tox]
legacy_tox_ini = """
[tox]
minversion = 3.25.0
envlist = py{38,39,310}-{pyqt5,pyside2,pyqt6,pyside6},coverage
isolated_build = true

[gh-actions]
python =
    3.8: py38-{pyqt5,pyside2,pyqt6,pyside6}
    3.9: py39-{pyqt5,pyside2,pyqt6,pyside6}
    3.10: py310-{pyqt5,pyside2,pyqt6,pyside6}

[testenv]
allowlist_externals =
    poetry
    pytest
setenv =
    # See: https://github.com/tox-dev/tox/issues/1550
    PYTHONIOENCODING=utf-8
    COVERAGE_FILE=tox-.coverage.{envname}
commands =
    poetry install --no-root --sync
    pyqt5: poetry run python -m pip install PyQt5 PyQt5-stubs
    pyside2: poetry run python -m pip install PySide2 PySide2-stubs
    pyqt6: poetry run python -m pip install PyQt6 IceSpringPySideStubs-PyQt6
    pyside6: poetry run python -m pip install PySide6 IceSpringPySideStubs-PySide6
    poetry run pytest

# Not run by default. To run, use `tox -e coverage`
[testenv:coverage]
depends = py{38,39,310}-{pyqt5,pyside2,pyqt6,pyside6}
allowlist_externals =
    sh
deps =
    coverage
    tox
basepython = python3.10
setenv =
    COVERAGE_FILE=tox-.coverage
    COVERAGE_DEBUG=trace
    COVERAGE_DEBUG_FILE=stdout
commands =
    coverage debug data
    coverage combine --debug=pathmap
    coverage report -m --skip-covered
    coverage html
    coverage json
    sh -c 'mv -f tox-.coverage .coverage'
    sh -c 'rm -f tox-.coverage'
parallel_show_output = true

# Not run by default. To run, use `tox -e docs`
[testenv:docs]
allowlist_externals =
    sphinx-build
commands =
    sphinx-build -W --keep-going -b docs docs/build
"""

# Currently, poetry only supports pure python builds. This (undocumented) feature
# tells poetry to build wheels for the Host OS
[tool.poetry.build]
generate-setup-file = false
script = 'build.py'

[build-system]
requires = [
    "setuptools>=45",
    "wheel",
    "toml",
]
build-backend = "setuptools.build_meta"

Reproduction steps

  1. Create and activate a virtual environment:
PS> py -m venv .venv && .venv\Scripts\Activate.ps1
  1. Update the pyproject.toml with emojis as shown (see Describe the bug) and install the project
PS> poetry install
  1. Run pre-commit
PS> pre-commit run bandit

Expected behavior

pre-commit is able to run with bandit hook without UnicodeDecodeErrors

Actual Behavior

bandit emits a UnicodeDecodeError:

(bandit) Check for security vulnerabilities........................................................Failed
- hook id: bandit
- exit code: 1

Traceback (most recent call last):
  File "C:\Program Files\Python38\lib\runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Program Files\Python38\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "C:\Users\hendra11\Code\external\qtpygraph\.venv\Scripts\bandit.EXE\__main__.py", line 7, in <module>
  File "c:\users\hendra11\code\external\qtpygraph\.venv\lib\site-packages\bandit\cli\main.py", line 455, in main
    b_conf = b_config.BanditConfig(config_file=args.config_file)
  File "c:\users\hendra11\code\external\qtpygraph\.venv\lib\site-packages\bandit\core\config.py", line 52, in __init__
    self._config = toml.load(f)["tool"]["bandit"]
  File "c:\users\hendra11\code\external\qtpygraph\.venv\lib\site-packages\toml\decoder.py", line 156, in load
    return loads(f.read(), _dict, decoder)
  File "C:\Program Files\Python38\lib\encodings\cp1252.py", line 23, in decode
    return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x90 in position 4080: character maps to <undefined>

Bandit version

1.7.4 (Default)

Python version

3.8

Additional context

OS: Windows 10, 21H2
Python: 3.8.10, x64-bit
poetry: 1.2.2

Following the stack trace, bandit==1.7.4 does not specify an encoding when using open:

if config_file:
try:
f = open(config_file)

However, the main branch has been updated to use read binary mode (open does not support encodings when reading in binary mode):

if config_file:
try:
f = open(config_file, "rb")

@adam-grant-hendry adam-grant-hendry added the bug Something isn't working label Oct 23, 2022
@adam-grant-hendry
Copy link
Author

adam-grant-hendry commented Oct 23, 2022

Feel free to close this as "to be resolved in future version", or something similarly appropriate, if applicable. Also feel free to incorporate in CHANGELOG for next revision, if applicable.

@ericwb
Copy link
Member

ericwb commented Jan 14, 2024

Looks like this has been fixed in 1.7.5+.

@ericwb ericwb closed this as completed Jan 14, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants