diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index a18739f4b26bff..5213120b33f06f 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1790,6 +1790,7 @@ I/O - Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`) - :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`) - Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`) +- Bug in :func:`read_csv` that caused the C engine on Python 3.6+ on Windows to improperly read CSV filenames with accented or special characters (:issue:`15086`) - Bug in :func:`read_fwf` in which the compression type of a file was not being properly inferred (:issue:`22199`) - Bug in :func:`pandas.io.json.json_normalize` that caused it to raise ``TypeError`` when two consecutive elements of ``record_path`` are dicts (:issue:`22706`) - Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index e0fcf102701f42..6cb6ed749f87b1 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -677,7 +677,13 @@ cdef class TextReader: if isinstance(source, basestring): if not isinstance(source, bytes): - source = source.encode(sys.getfilesystemencoding() or 'utf-8') + if compat.PY36 and compat.is_platform_windows(): + # see gh-15086. + encoding = "mbcs" + else: + encoding = sys.getfilesystemencoding() or "utf-8" + + source = source.encode(encoding) if self.memory_map: ptr = new_mmap(source) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 6860452f5ccc4e..d87ef7cd15a648 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -1904,6 +1904,18 @@ def test_suppress_error_output(all_parsers, capsys): assert captured.err == "" +def test_filename_with_special_chars(all_parsers): + # see gh-15086. + parser = all_parsers + df = DataFrame({"a": [1, 2, 3]}) + + with tm.ensure_clean("sé-es-vé.csv") as path: + df.to_csv(path, index=False) + + result = parser.read_csv(path) + tm.assert_frame_equal(result, df) + + def test_read_table_deprecated(all_parsers): # see gh-21948 parser = all_parsers