Skip to content

Commit

Permalink
fix: use UTF-8 encoding when opening files (#704)
Browse files Browse the repository at this point in the history
### Summary of Changes

Explictly use UTF-8 encoding when opening files. This fixes errors when
opening a CSV or JSON file like

```
UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 6: character maps to <undefined>
```

if they contained non-ASCII tokens.
  • Loading branch information
lars-reimann committed May 3, 2024
1 parent 69736a3 commit f8c27bc
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 10 deletions.
4 changes: 2 additions & 2 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def from_csv_file(path: str | Path) -> Table:
if path.suffix != ".csv":
raise WrongFileExtensionError(path, ".csv")
if path.exists():
with path.open() as f:
with path.open(encoding="utf-8") as f:
if f.read().replace("\n", "") == "":
return Table()

Expand Down Expand Up @@ -200,7 +200,7 @@ def from_json_file(path: str | Path) -> Table:
if path.suffix != ".json":
raise WrongFileExtensionError(path, ".json")
if path.exists():
with path.open() as f:
with path.open(encoding="utf-8") as f:
if f.read().replace("\n", "") in ("", "{}"):
return Table()

Expand Down
2 changes: 1 addition & 1 deletion tests/resources/table.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
A,B
1,2
,2
2 changes: 1 addition & 1 deletion tests/resources/table.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[
{
"A": 1,
"A": "",
"B": 2
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
@pytest.mark.parametrize(
("path", "expected"),
[
("table.csv", Table({"A": [1], "B": [2]})),
(Path("table.csv"), Table({"A": [1], "B": [2]})),
("table.csv", Table({"A": ["❔"], "B": [2]})),
(Path("table.csv"), Table({"A": ["❔"], "B": [2]})),
("emptytable.csv", Table()),
],
ids=["by String", "by path", "empty"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
@pytest.mark.parametrize(
("path", "expected"),
[
("table.json", Table({"A": [1], "B": [2]})),
(Path("table.json"), Table({"A": [1], "B": [2]})),
("table.json", Table({"A": ["❔"], "B": [2]})),
(Path("table.json"), Table({"A": ["❔"], "B": [2]})),
(Path("emptytable.json"), Table()),
],
ids=["by string", "by path", "empty"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
[
(
"table.csv",
TimeSeries({"A": [1], "B": [2]}, time_name="A", target_name="B"),
TimeSeries({"A": ["❔"], "B": [2]}, time_name="A", target_name="B"),
),
(Path("table.csv"), TimeSeries({"A": [1], "B": [2]}, time_name="A", target_name="B")),
(Path("table.csv"), TimeSeries({"A": ["❔"], "B": [2]}, time_name="A", target_name="B")),
],
ids=["by String", "by path"],
)
Expand Down

0 comments on commit f8c27bc

Please sign in to comment.