Skip to content

Commit

Permalink
feat: usable constructor for TaggedTable (#299)
Browse files Browse the repository at this point in the history
Closes #293.

### Summary of Changes

Change constructor of `TaggedTable` to be more usable. The usual way to
get a `TaggedTable` is still to call the `tag_column` method of `Table`.

---------

Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
  • Loading branch information
lars-reimann and megalinter-bot committed May 7, 2023
1 parent ed604f6 commit 01c3ad9
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 43 deletions.
33 changes: 17 additions & 16 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,21 @@ class Table:
| [from_dict][safeds.data.tabular.containers._table.Table.from_dict] | Create a table from a dictionary. |
| [from_columns][safeds.data.tabular.containers._table.Table.from_columns] | Create a table from a list of columns. |
| [from_rows][safeds.data.tabular.containers._table.Table.from_rows] | Create a table from a list of rows. |
Parameters
----------
data : Mapping[str, Sequence[Any]] | None
The data. If None, an empty table is created.
Raises
------
ColumnLengthMismatchError
If columns have different lengths.
Examples
--------
>>> from safeds.data.tabular.containers import Table
>>> table = Table({"a": [1, 2, 3], "b": [4, 5, 6]})
"""

# ------------------------------------------------------------------------------------------------------------------
Expand All @@ -62,8 +77,6 @@ def from_csv_file(path: str | Path) -> Table:
"""
Read data from a CSV file into a table.
This table is not modified.
Parameters
----------
path : str | Path
Expand Down Expand Up @@ -91,8 +104,6 @@ def from_excel_file(path: str | Path) -> Table:
"""
Read data from an Excel file into a table.
This table is not modified.
Parameters
----------
path : str | Path
Expand Down Expand Up @@ -122,8 +133,6 @@ def from_json_file(path: str | Path) -> Table:
"""
Read data from a JSON file into a table.
This table is not modified.
Parameters
----------
path : str | Path
Expand Down Expand Up @@ -151,8 +160,6 @@ def from_dict(data: dict[str, list[Any]]) -> Table:
"""
Create a table from a dictionary that maps column names to column values.
This table is not modified.
Parameters
----------
data : dict[str, list[Any]]
Expand All @@ -175,8 +182,6 @@ def from_columns(columns: list[Column]) -> Table:
"""
Return a table created from a list of columns.
This table is not modified.
Parameters
----------
columns : list[Column]
Expand Down Expand Up @@ -208,8 +213,6 @@ def from_rows(rows: list[Row]) -> Table:
"""
Return a table created from a list of rows.
This table is not modified.
Parameters
----------
rows : list[Row]
Expand Down Expand Up @@ -245,8 +248,6 @@ def _from_pandas_dataframe(data: pd.DataFrame, schema: Schema | None = None) ->
"""
Create a table from a `pandas.DataFrame`.
This table is not modified.
Parameters
----------
data : pd.DataFrame
Expand Down Expand Up @@ -1070,7 +1071,7 @@ def tag_columns(self, target_name: str, feature_names: list[str] | None = None)
----------
target_name : str
Name of the target column.
feature_names : Optional[list[str]]
feature_names : list[str] | None
Names of the feature columns. If None, all columns except the target column are used.
Returns
Expand All @@ -1080,7 +1081,7 @@ def tag_columns(self, target_name: str, feature_names: list[str] | None = None)
"""
from ._tagged_table import TaggedTable

return TaggedTable(self._data, self._schema, target_name, feature_names)
return TaggedTable._from_table(self, target_name, feature_names)

def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Table:
"""
Expand Down
125 changes: 114 additions & 11 deletions src/safeds/data/tabular/containers/_tagged_table.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import pandas as pd
from __future__ import annotations

from typing import TYPE_CHECKING

from safeds.data.tabular.containers import Column, Table
from safeds.data.tabular.typing import Schema

if TYPE_CHECKING:
from collections.abc import Mapping, Sequence
from typing import Any


class TaggedTable(Table):
Expand All @@ -10,30 +15,128 @@ class TaggedTable(Table):
Parameters
----------
data : Iterable
data : Mapping[str, Sequence[Any]]
The data.
target_name : str
Name of the target column.
feature_names : Optional[list[str]]
feature_names : list[str] | None
Names of the feature columns. If None, all columns except the target column are used.
schema : Optional[Schema]
The schema of the table. If not specified, the schema will be inferred from the data.
Raises
------
ColumnLengthMismatchError
If columns have different lengths.
ValueError
If the target column is also a feature column.
ValueError
If no feature columns are specified.
Examples
--------
>>> from safeds.data.tabular.containers import Table, TaggedTable
>>> table = Table({"col1": ["a", "b"], "col2": [1, 2]})
>>> tagged_table = table.tag_columns("col2", ["col1"])
"""

# ------------------------------------------------------------------------------------------------------------------
# Creation
# ------------------------------------------------------------------------------------------------------------------

@staticmethod
def _from_table(
table: Table,
target_name: str,
feature_names: list[str] | None = None,
) -> TaggedTable:
"""
Create a tagged table from a table.
Parameters
----------
table : Table
The table.
target_name : str
Name of the target column.
feature_names : list[str] | None
Names of the feature columns. If None, all columns except the target column are used.
Returns
-------
tagged_table : TaggedTable
The created table.
Raises
------
ValueError
If the target column is also a feature column.
ValueError
If no feature columns are specified.
Examples
--------
>>> from safeds.data.tabular.containers import Table, TaggedTable
>>> table = Table({"col1": ["a", "b", "c", "a"], "col2": [1, 2, 3, 4]})
>>> tagged_table = TaggedTable._from_table(table, "col2", ["col1"])
"""
# If no feature names are specified, use all columns except the target column
if feature_names is None:
feature_names = table.column_names
if target_name in feature_names:
feature_names.remove(target_name)

# Validate inputs
if target_name in feature_names:
raise ValueError(f"Column '{target_name}' cannot be both feature and target.")
if len(feature_names) == 0:
raise ValueError("At least one feature column must be specified.")

# Create result
result = object.__new__(TaggedTable)

result._data = table._data
result._schema = table.schema
result._features = result.keep_only_columns(feature_names)
result._target = result.get_column(target_name)

return result

# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

# noinspection PyMissingConstructor
def __init__(
self,
data: pd.DataFrame,
schema: Schema,
data: Mapping[str, Sequence[Any]],
target_name: str,
feature_names: list[str] | None = None,
):
self._data = data
self._schema = schema
"""
Create a tagged table from a mapping of column names to their values.
Parameters
----------
data : Mapping[str, Sequence[Any]]
The data.
target_name : str
Name of the target column.
feature_names : list[str] | None
Names of the feature columns. If None, all columns except the target column are used.
Raises
------
ColumnLengthMismatchError
If columns have different lengths.
ValueError
If the target column is also a feature column.
ValueError
If no feature columns are specified.
Examples
--------
>>> from safeds.data.tabular.containers import TaggedTable
>>> table = TaggedTable({"a": [1, 2, 3], "b": [4, 5, 6]}, "b", ["a"])
"""
super().__init__(data)

# If no feature names are specified, use all columns except the target column
if feature_names is None:
Expand Down
59 changes: 43 additions & 16 deletions tests/safeds/data/tabular/containers/test_tagged_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,71 @@


@pytest.fixture()
def table() -> Table:
return Table(
{
"A": [1, 4],
"B": [2, 5],
"C": [3, 6],
"T": [0, 1],
},
)
def data() -> dict[str, list[int]]:
return {
"A": [1, 4],
"B": [2, 5],
"C": [3, 6],
"T": [0, 1],
}


@pytest.fixture()
def table(data: dict[str, list[int]]) -> Table:
return Table(data)


@pytest.fixture()
def tagged_table(table: Table) -> TaggedTable:
return table.tag_columns(target_name="T")


class TestInit:
class TestFromTable:
def test_should_raise_if_a_feature_does_not_exist(self, table: Table) -> None:
with pytest.raises(UnknownColumnNameError):
table.tag_columns(target_name="T", feature_names=["A", "B", "C", "D"])
TaggedTable._from_table(table, target_name="T", feature_names=["A", "B", "C", "D"])

def test_should_raise_if_target_does_not_exist(self, table: Table) -> None:
with pytest.raises(UnknownColumnNameError):
table.tag_columns(target_name="D")
TaggedTable._from_table(table, target_name="D")

def test_should_raise_if_features_and_target_overlap(self, table: Table) -> None:
with pytest.raises(ValueError, match="Column 'A' cannot be both feature and target."):
table.tag_columns(target_name="A", feature_names=["A", "B", "C"])
TaggedTable._from_table(table, target_name="A", feature_names=["A", "B", "C"])

def test_should_raise_if_features_are_empty_explicitly(self, table: Table) -> None:
with pytest.raises(ValueError, match="At least one feature column must be specified."):
table.tag_columns(target_name="A", feature_names=[])
TaggedTable._from_table(table, target_name="A", feature_names=[])

def test_should_raise_if_features_are_empty_implicitly(self, table: Table) -> None:
def test_should_raise_if_features_are_empty_implicitly(self) -> None:
table = Table({"A": [1, 4]})

with pytest.raises(ValueError, match="At least one feature column must be specified."):
table.tag_columns(target_name="A")
TaggedTable._from_table(table, target_name="A")


class TestInit:
def test_should_raise_if_a_feature_does_not_exist(self, data: dict[str, list[int]]) -> None:
with pytest.raises(UnknownColumnNameError):
TaggedTable(data, target_name="T", feature_names=["A", "B", "C", "D"])

def test_should_raise_if_target_does_not_exist(self, data: dict[str, list[int]]) -> None:
with pytest.raises(UnknownColumnNameError):
TaggedTable(data, target_name="D")

def test_should_raise_if_features_and_target_overlap(self, data: dict[str, list[int]]) -> None:
with pytest.raises(ValueError, match="Column 'A' cannot be both feature and target."):
TaggedTable(data, target_name="A", feature_names=["A", "B", "C"])

def test_should_raise_if_features_are_empty_explicitly(self, data: dict[str, list[int]]) -> None:
with pytest.raises(ValueError, match="At least one feature column must be specified."):
TaggedTable(data, target_name="A", feature_names=[])

def test_should_raise_if_features_are_empty_implicitly(self) -> None:
data = {"A": [1, 4]}

with pytest.raises(ValueError, match="At least one feature column must be specified."):
TaggedTable(data, target_name="A")


class TestFeatures:
Expand Down

0 comments on commit 01c3ad9

Please sign in to comment.