Skip to content

Commit

Permalink
feat: check that methods of table can handle an empty table (#314)
Browse files Browse the repository at this point in the history
Closes #123.

### Summary of Changes
Every test for _table.py now ensures that empty tables can be handled.
In some cases, the Table class has been adjusted, e.g. __eq__ method.

Co-authored-by: jxnior01 <129027012+jxnior01@users.noreply.github.com>
Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
Co-authored-by: Alexander <47296670+Marsmaennchen221@users.noreply.github.com>
  • Loading branch information
4 people committed Jun 6, 2023
1 parent afb98be commit 686c2e7
Show file tree
Hide file tree
Showing 47 changed files with 514 additions and 135 deletions.
114 changes: 97 additions & 17 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,14 @@ def from_csv_file(path: str | Path) -> Table:
path = Path(path)
if path.suffix != ".csv":
raise WrongFileExtensionError(path, ".csv")
try:
if path.exists():
with path.open() as f:
if f.read().replace("\n", "") == "":
return Table()

return Table._from_pandas_dataframe(pd.read_csv(path))
except FileNotFoundError as exception:
raise FileNotFoundError(f'File "{path}" does not exist') from exception
else:
raise FileNotFoundError(f'File "{path}" does not exist')

@staticmethod
def from_excel_file(path: str | Path) -> Table:
Expand Down Expand Up @@ -164,10 +168,14 @@ def from_json_file(path: str | Path) -> Table:
path = Path(path)
if path.suffix != ".json":
raise WrongFileExtensionError(path, ".json")
try:
if path.exists():
with path.open() as f:
if f.read().replace("\n", "") in ("", "{}"):
return Table()

return Table._from_pandas_dataframe(pd.read_json(path))
except FileNotFoundError as exception:
raise FileNotFoundError(f'File "{path}" does not exist') from exception
else:
raise FileNotFoundError(f'File "{path}" does not exist')

@staticmethod
def from_dict(data: dict[str, list[Any]]) -> Table:
Expand Down Expand Up @@ -351,6 +359,8 @@ def __eq__(self, other: Any) -> bool:
return self.column_names == other.column_names
table1 = self.sort_columns()
table2 = other.sort_columns()
if table1.number_of_rows == 0 and table2.number_of_rows == 0:
return table1.column_names == table2.column_names
return table1._schema == table2._schema and table1._data.equals(table2._data)

def __repr__(self) -> str:
Expand Down Expand Up @@ -528,6 +538,44 @@ def summary(self) -> Table:
result : Table
The table with statistics.
"""
if self.number_of_columns == 0:
return Table(
{
"metrics": [
"maximum",
"minimum",
"mean",
"mode",
"median",
"sum",
"variance",
"standard deviation",
"idness",
"stability",
],
},
)
elif self.number_of_rows == 0:
table = Table(
{
"metrics": [
"maximum",
"minimum",
"mean",
"mode",
"median",
"sum",
"variance",
"standard deviation",
"idness",
"stability",
],
},
)
for name in self.column_names:
table = table.add_column(Column(name, ["-", "-", "-", "-", "-", "-", "-", "-", "-", "-"]))
return table

columns = self.to_columns()
result = pd.DataFrame()
statistics = {}
Expand Down Expand Up @@ -587,7 +635,7 @@ def add_column(self, column: Column) -> Table:
if self.has_column(column.name):
raise DuplicateColumnNameError(column.name)

if column._data.size != self.number_of_rows:
if column.number_of_rows != self.number_of_rows and self.number_of_columns != 0:
raise ColumnSizeError(str(self.number_of_rows), str(column._data.size))

result = self._data.copy()
Expand Down Expand Up @@ -626,7 +674,7 @@ def add_columns(self, columns: list[Column] | Table) -> Table:
if column.name in result.columns:
raise DuplicateColumnNameError(column.name)

if column._data.size != self.number_of_rows:
if column.number_of_rows != self.number_of_rows and self.number_of_columns != 0:
raise ColumnSizeError(str(self.number_of_rows), str(column._data.size))

result[column.name] = column._data
Expand All @@ -637,6 +685,7 @@ def add_row(self, row: Row) -> Table:
Add a row to the table.
This table is not modified.
If the table happens to be empty beforehand, respective features will be added automatically.
Parameters
----------
Expand All @@ -653,12 +702,27 @@ def add_row(self, row: Row) -> Table:
SchemaMismatchError
If the schema of the row does not match the table schema.
"""
if self._schema != row.schema:
int_columns = []
result = self.remove_columns([]) # clone
if result.number_of_rows == 0:
int_columns = list(filter(lambda name: isinstance(row[name], int | np.int64), row.column_names))
if result.number_of_columns == 0:
for column in row.column_names:
result._data[column] = Column(column, [])
result._schema = Schema._from_pandas_dataframe(result._data)
elif result.column_names != row.column_names:
raise SchemaMismatchError
elif result._schema != row.schema:
raise SchemaMismatchError

new_df = pd.concat([self._data, row._data]).infer_objects()
new_df.columns = self.column_names
return Table._from_pandas_dataframe(new_df)
new_df = pd.concat([result._data, row._data]).infer_objects()
new_df.columns = result.column_names
result = Table._from_pandas_dataframe(new_df)

for column in int_columns:
result = result.replace_column(column, result.get_column(column).transform(lambda it: int(it)))

return result

def add_rows(self, rows: list[Row] | Table) -> Table:
"""
Expand All @@ -683,16 +747,30 @@ def add_rows(self, rows: list[Row] | Table) -> Table:
"""
if isinstance(rows, Table):
rows = rows.to_rows()
result = self._data
int_columns = []
result = self.remove_columns([]) # clone
for row in rows:
if self._schema != row.schema:
if result.number_of_rows == 0:
int_columns = list(filter(lambda name: isinstance(row[name], int | np.int64), row.column_names))
if result.number_of_columns == 0:
for column in row.column_names:
result._data[column] = Column(column, [])
result._schema = Schema._from_pandas_dataframe(result._data)
elif result.column_names != row.column_names:
raise SchemaMismatchError
elif result._schema != row.schema:
raise SchemaMismatchError

row_frames = (row._data for row in rows)

result = pd.concat([result, *row_frames]).infer_objects()
result.columns = self.column_names
return Table._from_pandas_dataframe(result)
new_df = pd.concat([result._data, *row_frames]).infer_objects()
new_df.columns = result.column_names
result = Table._from_pandas_dataframe(new_df)

for column in int_columns:
result = result.replace_column(column, result.get_column(column).transform(lambda it: int(it)))

return result

def filter_rows(self, query: Callable[[Row], bool]) -> Table:
"""
Expand Down Expand Up @@ -1118,6 +1196,8 @@ def split(self, percentage_in_first: float) -> tuple[Table, Table]:
"""
if percentage_in_first < 0 or percentage_in_first > 1:
raise ValueError("The given percentage is not between 0 and 1")
if self.number_of_rows == 0:
return Table(), Table()
return (
self.slice_rows(0, round(percentage_in_first * self.number_of_rows)),
self.slice_rows(round(percentage_in_first * self.number_of_rows)),
Expand Down
Binary file added tests/resources/empty_excel_file.xlsx
Binary file not shown.
Empty file added tests/resources/emptytable.csv
Empty file.
1 change: 1 addition & 0 deletions tests/resources/emptytable.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
Binary file added tests/resources/image/snapshot_empty_heatmap.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 12 additions & 2 deletions tests/safeds/data/tabular/containers/_table/test_add_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,22 @@
Column("col3", [0, -1, -2]),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": [0, -1, -2]}),
),
(
Table({}),
Column("col3", []),
Table({"col3": []}),
),
(
Table({}),
Column("col3", [1]),
Table({"col3": [1]}),
),
],
ids=["String", "Integer"],
ids=["String", "Integer", "empty with empty column", "empty with filled column"],
)
def test_should_add_column(table1: Table, column: Column, expected: Table) -> None:
table1 = table1.add_column(column)
assert table1.schema == expected.schema
# assert table1.schema == expected.schema
assert table1 == expected


Expand Down
23 changes: 20 additions & 3 deletions tests/safeds/data/tabular/containers/_table/test_add_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,22 @@
[Column("col3", [0, -1, -2]), Column("col4", ["a", "b", "c"])],
Table({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": [0, -1, -2], "col4": ["a", "b", "c"]}),
),
(
Table({}),
[Column("col3", []), Column("col4", [])],
Table({"col3": [], "col4": []}),
),
(
Table({}),
[Column("col3", [1]), Column("col4", [2])],
Table({"col3": [1], "col4": [2]}),
),
],
ids=["add 2 columns"],
ids=["add 2 columns", "empty with empty column", "empty with filled column"],
)
def test_should_add_columns(table1: Table, columns: list[Column], expected: Table) -> None:
table1 = table1.add_columns(columns)
assert table1.schema == expected.schema
# assert table1.schema == expected.schema
assert table1 == expected


Expand All @@ -28,8 +38,15 @@ def test_should_add_columns(table1: Table, columns: list[Column], expected: Tabl
Table({"col3": [0, -1, -2], "col4": ["a", "b", "c"]}),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": [0, -1, -2], "col4": ["a", "b", "c"]}),
),
(Table(), Table({"col1": [1, 2], "col2": [60, 2]}), Table({"col1": [1, 2], "col2": [60, 2]})),
(
Table({"col1": [1, 2], "col2": [60, 2]}),
Table(),
Table({"col1": [1, 2], "col2": [60, 2]}),
),
(Table({"yeet": [], "col": []}), Table({"gg": []}), Table({"yeet": [], "col": [], "gg": []})),
],
ids=["add a table with 2 columns"],
ids=["add a table with 2 columns", "empty add filled", "filled add empty", "rowless"],
)
def test_should_add_columns_from_table(table1: Table, table2: Table, expected: Table) -> None:
table1 = table1.add_columns(table2)
Expand Down
23 changes: 16 additions & 7 deletions tests/safeds/data/tabular/containers/_table/test_add_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,30 @@


@pytest.mark.parametrize(
("table", "row"),
("table", "row", "expected"),
[
(Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Row({"col1": 5, "col2": 6})),
(
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Row({"col1": 5, "col2": 6}),
Table({"col1": [1, 2, 1, 5], "col2": [1, 2, 4, 6]}),
),
(Table({"col2": [], "col4": []}), Row({"col2": 5, "col4": 6}), Table({"col2": [5], "col4": [6]})),
(Table(), Row({"col2": 5, "col4": 6}), Table({"col2": [5], "col4": [6]})),
],
ids=["added row"],
ids=["add row", "add row to rowless table", "add row to empty table"],
)
def test_should_add_row(table: Table, row: Row) -> None:
def test_should_add_row(table: Table, row: Row, expected: Table) -> None:
table = table.add_row(row)
assert table.number_of_rows == 4
assert table.get_row(3) == row
assert table.schema == row._schema
assert table == expected


def test_should_raise_error_if_row_schema_invalid() -> None:
table1 = Table({"col1": [1, 2, 1], "col2": [1, 2, 4]})
row = Row({"col1": 5, "col2": "Hallo"})
with raises(SchemaMismatchError, match=r"Failed because at least two schemas didn't match."):
table1.add_row(row)


def test_should_raise_schema_mismatch() -> None:
with raises(SchemaMismatchError, match=r"Failed because at least two schemas didn't match."):
Table({"a": [], "b": []}).add_row(Row({"beer": None, "rips": None}))
32 changes: 30 additions & 2 deletions tests/safeds/data/tabular/containers/_table/test_add_rows.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
from _pytest.python_api import raises
from safeds.data.tabular.containers import Row, Table
from safeds.exceptions import SchemaMismatchError

Expand All @@ -11,8 +12,13 @@
[Row({"col1": "d", "col2": 6}), Row({"col1": "e", "col2": 8})],
Table({"col1": ["a", "b", "c", "d", "e"], "col2": [1, 2, 4, 6, 8]}),
),
(
Table(),
[Row({"col1": "d", "col2": 6}), Row({"col1": "e", "col2": 8})],
Table({"col1": ["d", "e"], "col2": [6, 8]}),
),
],
ids=["Rows with string and integer values"],
ids=["Rows with string and integer values", "empty"],
)
def test_should_add_rows(table1: Table, rows: list[Row], table2: Table) -> None:
table1 = table1.add_rows(rows)
Expand All @@ -28,8 +34,23 @@ def test_should_add_rows(table1: Table, rows: list[Row], table2: Table) -> None:
Table({"col1": [5, 7], "col2": [6, 8]}),
Table({"col1": [1, 2, 1, 5, 7], "col2": [1, 2, 4, 6, 8]}),
),
(
Table({"col1": [2], "yikes": [5]}),
Table(),
Table({"col1": [2], "yikes": [5]}),
),
(
Table(),
Table({"col1": [2], "yikes": [5]}),
Table({"col1": [2], "yikes": [5]}),
),
(
Table({"col1": [], "yikes": []}),
Table({"col1": [], "yikes": []}),
Table({"col1": [], "yikes": []}),
),
],
ids=["Rows from table"],
ids=["Rows from table", "add empty to table", "add on empty table", "rowless"],
)
def test_should_add_rows_from_table(table1: Table, table2: Table, expected: Table) -> None:
table1 = table1.add_rows(table2)
Expand All @@ -42,3 +63,10 @@ def test_should_raise_error_if_row_schema_invalid() -> None:
row = [Row({"col1": 2, "col2": 4}), Row({"col1": 5, "col2": "Hallo"})]
with pytest.raises(SchemaMismatchError, match=r"Failed because at least two schemas didn't match."):
table1.add_rows(row)


def test_should_raise_schema_mismatch() -> None:
with raises(SchemaMismatchError, match=r"Failed because at least two schemas didn't match."):
Table({"a": [], "b": []}).add_rows([Row({"a": None, "b": None}), Row({"beer": None, "rips": None})])
with raises(SchemaMismatchError, match=r"Failed because at least two schemas didn't match."):
Table({"a": [], "b": []}).add_rows([Row({"beer": None, "rips": None}), Row({"a": None, "b": None})])
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
("table", "expected"),
[
(Table({"col1": [1], "col2": [1]}), ["col1", "col2"]),
(Table({"col": [], "gg": []}), ["col", "gg"]),
(Table(), []),
],
ids=["Integer", "empty"],
ids=["Integer", "rowless", "empty"],
)
def test_should_compare_column_names(table: Table, expected: list) -> None:
assert table.column_names == expected
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,14 @@
1,
Table._from_pandas_dataframe(pd.DataFrame(), Schema({"col1": Integer(), "col2": Integer()})),
),
(
Table(),
"col1",
1,
Table._from_pandas_dataframe(pd.DataFrame(), Schema({})),
),
],
ids=["filter for col1 = 1", "empty table"],
ids=["filter for col1 = 1", "no finding", "empty table"],
)
def test_should_filter_rows(table1: Table, filter_column: str, filter_value: ColumnType, table2: Table) -> None:
table1 = table1.filter_rows(lambda row: row.get_value(filter_column) == filter_value)
Expand Down

0 comments on commit 686c2e7

Please sign in to comment.