Skip to content

Commit

Permalink
feat: subclass of Table that can include column descriptions (#43)
Browse files Browse the repository at this point in the history
Closes #41.

### Summary of Changes

Create a new class `ExampleTable` that is a subclass of `Table` and can
additionally store column descriptions. This allows the example data to
be self-contained. The original `describe_XY` methods are removed.

---------

Co-authored-by: lars-reimann <lars-reimann@users.noreply.github.com>
  • Loading branch information
lars-reimann and lars-reimann committed Mar 29, 2023
1 parent 8962627 commit 56aff62
Show file tree
Hide file tree
Showing 13 changed files with 189 additions and 104 deletions.
8 changes: 3 additions & 5 deletions docs/examples/house_sales.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@
"\n",
"disable_warnings()\n",
"\n",
"from safeds_examples.tabular import describe_house_sales_columns\n",
"from safeds_examples.tabular import load_house_sales\n",
"from display_column_description import display_column_descriptions\n",
"\n",
"house_sales_description = describe_house_sales_columns()\n",
"house_sales = load_house_sales()\n",
"house_sales_description = house_sales.column_descriptions\n",
"display_column_descriptions(house_sales_description)"
],
"metadata": {
Expand All @@ -53,9 +54,6 @@
"execution_count": null,
"outputs": [],
"source": [
"from safeds_examples.tabular import load_house_sales\n",
"\n",
"house_sales = load_house_sales()\n",
"house_sales.slice(end=10)"
],
"metadata": {
Expand Down
8 changes: 3 additions & 5 deletions docs/examples/titanic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@
"\n",
"disable_warnings()\n",
"\n",
"from safeds_examples.tabular import describe_titanic_columns\n",
"from safeds_examples.tabular import load_titanic\n",
"from display_column_description import display_column_descriptions\n",
"\n",
"titanic_description = describe_titanic_columns()\n",
"titanic = load_titanic()\n",
"titanic_description = titanic.column_descriptions\n",
"display_column_descriptions(titanic_description)"
],
"metadata": {
Expand All @@ -53,9 +54,6 @@
"execution_count": null,
"outputs": [],
"source": [
"from safeds_examples.tabular import load_titanic\n",
"\n",
"titanic = load_titanic()\n",
"titanic.slice(end=10)"
],
"metadata": {
Expand Down
4 changes: 2 additions & 2 deletions src/safeds_examples/tabular/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from ._house_sales import describe_house_sales_columns, load_house_sales
from ._titanic import describe_titanic_columns, load_titanic
from ._house_sales import load_house_sales
from ._titanic import load_titanic
2 changes: 1 addition & 1 deletion src/safeds_examples/tabular/_house_sales/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from ._house_sales import describe_house_sales_columns, load_house_sales
from ._house_sales import load_house_sales
85 changes: 32 additions & 53 deletions src/safeds_examples/tabular/_house_sales/_house_sales.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,48 @@
import os

from safeds.data.tabular.containers import Table
from safeds_examples.tabular.containers import ExampleTable

_path = os.path.join(os.path.dirname(__file__), "data", "house_sales.csv")


def load_house_sales() -> Table:
def load_house_sales() -> ExampleTable:
"""
Loads the "House Sales" dataset.
Returns
-------
Table
ExampleTable
The "House Sales" dataset.
"""

return Table.from_csv_file(_path)


def describe_house_sales_columns() -> Table:
"""
Returns a `Table` with two columns `"Name"` and `"Description"`, containing the name of a column in the "House
Sales" dataset and its description respectively.
Returns
-------
Table
A `Table` with names and descriptions for all columns of the "House Sales" dataset.
"""

return Table(
[
{"Name": "id", "Description": "A unique identifier"},
{"Name": "year", "Description": "Year of sale"},
{"Name": "month", "Description": "Month of sale"},
{"Name": "day", "Description": "Day of sale"},
{"Name": "zipcode", "Description": "Zipcode"},
{"Name": "latitude", "Description": "Latitude"},
{"Name": "longitude", "Description": "Longitude"},
{"Name": "sqft_lot", "Description": "Lot area in square feet"},
{"Name": "sqft_living", "Description": "Interior living space in square feet"},
{"Name": "sqft_above", "Description": "Interior living space above ground in square feet"},
{"Name": "sqft_basement", "Description": "Interior living space below ground in square feet"},
{"Name": "floors", "Description": "Number of floors"},
{"Name": "bedrooms", "Description": "Number of bedrooms"},
{
"Name": "bathrooms",
"Description": "Number of bathrooms.\n\n"
"Fractional values indicate that components (toilet/sink/shower/bathtub) are missing.",
},
{"Name": "waterfront", "Description": "Whether the building overlooks a waterfront (0 = no, 1 = yes)"},
{"Name": "view", "Description": "Rating of the view (1 to 5, higher is better)"},
{"Name": "condition", "Description": "Rating of the condition of the house (1 to 5, higher is better)"},
{"Name": "grade", "Description": "Rating of building construction and design (1 to 13, higher is better)"},
{"Name": "year_built", "Description": "Year the house was built"},
{
"Name": "year_renovated",
"Description": "Year the house was last renovated.\n\n"
"A value of 0 indicates that it was never renovated.",
},
{"Name": "sqft_lot_15nn", "Description": "Lot area of the 15 nearest neighbors in square feet"},
{
"Name": "sqft_living_15nn",
"Description": "Interior living space of the 15 nearest neighbors in square feet",
},
{"Name": "price", "Description": "Price the house sold for in USD"},
]
return ExampleTable(
Table.from_csv_file(_path),
column_descriptions={
"id": "A unique identifier",
"year": "Year of sale",
"month": "Month of sale",
"day": "Day of sale",
"zipcode": "Zipcode",
"latitude": "Latitude",
"longitude": "Longitude",
"sqft_lot": "Lot area in square feet",
"sqft_living": "Interior living space in square feet",
"sqft_above": "Interior living space above ground in square feet",
"sqft_basement": "Interior living space below ground in square feet",
"floors": "Number of floors",
"bedrooms": "Number of bedrooms",
"bathrooms": "Number of bathrooms.\n\n"
"Fractional values indicate that components (toilet/sink/shower/bathtub) are missing.",
"waterfront": "Whether the building overlooks a waterfront (0 = no, 1 = yes)",
"view": "Rating of the view (1 to 5, higher is better)",
"condition": "Rating of the condition of the house (1 to 5, higher is better)",
"grade": "Rating of building construction and design (1 to 13, higher is better)",
"year_built": "Year the house was built",
"year_renovated": "Year the house was last renovated.\n\n"
"A value of 0 indicates that it was never renovated.",
"sqft_lot_15nn": "Lot area of the 15 nearest neighbors in square feet",
"sqft_living_15nn": "Interior living space of the 15 nearest neighbors in square feet",
"price": "Price the house sold for in USD",
},
)
2 changes: 1 addition & 1 deletion src/safeds_examples/tabular/_titanic/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from ._titanic import describe_titanic_columns, load_titanic
from ._titanic import load_titanic
50 changes: 19 additions & 31 deletions src/safeds_examples/tabular/_titanic/_titanic.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,35 @@
import os

from safeds.data.tabular.containers import Table
from safeds_examples.tabular.containers import ExampleTable

_path = os.path.join(os.path.dirname(__file__), "data", "titanic.csv")


def load_titanic() -> Table:
def load_titanic() -> ExampleTable:
"""
Loads the "Titanic" dataset.
Returns
-------
Table
ExampleTable
The "Titanic" dataset.
"""

return Table.from_csv_file(_path)


def describe_titanic_columns() -> Table:
"""
Returns a `Table` with two columns `"Name"` and `"Description"`, containing the name of a column in the "Titanic"
dataset and its description respectively.
Returns
-------
Table
A `Table` with names and descriptions for all columns of the "Titanic" dataset.
"""

return Table(
[
{"Name": "id", "Description": "A unique identifier"},
{"Name": "name", "Description": "Name of the passenger"},
{"Name": "sex", "Description": "Sex of the passenger"},
{"Name": "age", "Description": "Age of the passenger at the time of the accident"},
{"Name": "siblings_spouses", "Description": "Number of siblings or spouses aboard"},
{"Name": "parents_children", "Description": "Number of parents or children aboard"},
{"Name": "ticket", "Description": "Ticket number"},
{"Name": "travel_class", "Description": "Travel class (1 = first, 2 = second, 3 = third)"},
{"Name": "fare", "Description": "Fare"},
{"Name": "cabin", "Description": "Cabin number"},
{"Name": "port_embarked", "Description": "Port of embarkation"},
{"Name": "survived", "Description": "Whether the passenger survived the accident"},
]
return ExampleTable(
Table.from_csv_file(_path),
column_descriptions={
"id": "A unique identifier",
"name": "Name of the passenger",
"sex": "Sex of the passenger",
"age": "Age of the passenger at the time of the accident",
"siblings_spouses": "Number of siblings or spouses aboard",
"parents_children": "Number of parents or children aboard",
"ticket": "Ticket number",
"travel_class": "Travel class (1 = first, 2 = second, 3 = third)",
"fare": "Fare",
"cabin": "Cabin number",
"port_embarked": "Port of embarkation",
"survived": "Whether the passenger survived the accident (0 = no, 1 = yes)",
},
)
1 change: 1 addition & 0 deletions src/safeds_examples/tabular/containers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._example_table import ExampleTable
68 changes: 68 additions & 0 deletions src/safeds_examples/tabular/containers/_example_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from safeds.data.tabular.containers import Table
from safeds.exceptions import UnknownColumnNameError


class ExampleTable(Table):
"""
A `Table` with descriptions for its columns.
Parameters
----------
table : Table
The table.
column_descriptions : dict[str, str]
A dictionary mapping column names to their descriptions.
Raises
------
UnknownColumnNameError
If a column name in `descriptions` does not exist in `table`.
"""

def __init__(self, table: Table, column_descriptions: dict[str, str]) -> None:
# Check that all column names in `descriptions` exist in `table`
invalid_column_names = set(column_descriptions.keys()) - set(table.get_column_names())
if invalid_column_names:
raise UnknownColumnNameError(list(invalid_column_names))

super().__init__(table._data, table.schema)
self._descriptions = column_descriptions

@property
def column_descriptions(self) -> Table:
"""
Returns a `Table` with two columns `"Name"` and `"Description"`, containing the name of a column and its
description respectively.
"""

return Table(
[
{"Name": column_name, "Description": self.get_column_description(column_name)}
for column_name in self.get_column_names()
]
)

def get_column_description(self, column_name: str) -> str:
"""
Get the description of a column. If no description exists, an empty string is returned.
Parameters
----------
column_name : str
The name of the column.
Returns
-------
description : str
The description of the column.
Raises
------
UnknownColumnNameError
If no column with the given name exists.
"""

if column_name not in self.get_column_names():
raise UnknownColumnNameError([column_name])

return self._descriptions.get(column_name, "")
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import FloatColumnType, IntColumnType, TableSchema
from safeds_examples.tabular import describe_house_sales_columns, load_house_sales
from safeds_examples.tabular import load_house_sales


class TestLoadHouseSales:
Expand Down Expand Up @@ -53,9 +53,9 @@ def test_columns_with_missing_values(self, house_sales: Table) -> None:
assert actual_column_names == set()


class TestDescribeHouseSalesColumns:
class TestColumnDescriptions:
def test_all_columns_have_descriptions(self) -> None:
house_sales = load_house_sales()
descriptions = describe_house_sales_columns()
descriptions = house_sales.column_descriptions

assert set(descriptions.get_column("Name")._data) == set(house_sales.get_column_names())
6 changes: 3 additions & 3 deletions tests/safeds_examples/tabular/_titanic/test_titanic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
StringColumnType,
TableSchema,
)
from safeds_examples.tabular import describe_titanic_columns, load_titanic
from safeds_examples.tabular import load_titanic


class TestLoadTitanic:
Expand Down Expand Up @@ -47,9 +47,9 @@ def test_columns_with_missing_values(self, titanic: Table) -> None:
assert actual_column_names == {"age", "port_embarked", "fare", "cabin"}


class TestDescribeTitanicColumns:
class TestColumnDescriptions:
def test_all_columns_have_descriptions(self) -> None:
titanic = load_titanic()
descriptions = describe_titanic_columns()
descriptions = titanic.column_descriptions

assert set(descriptions.get_column("Name")._data) == set(titanic.get_column_names())
Empty file.
53 changes: 53 additions & 0 deletions tests/safeds_examples/tabular/containers/test_example_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import pytest
from safeds.data.tabular.containers import Column, Table
from safeds.exceptions import UnknownColumnNameError
from safeds_examples.tabular.containers import ExampleTable


@pytest.fixture
def example_table() -> ExampleTable:
return ExampleTable(
Table.from_columns(
[
Column("a", [1, 2, 3]),
Column("b", [4, 5, 6]),
]
),
column_descriptions={"a": "The first column"},
)


class TestInit:
def test_should_raise_if_column_does_not_exist(self) -> None:
with pytest.raises(UnknownColumnNameError):
ExampleTable(
Table.from_columns(
[
Column("a", [1, 2, 3]),
Column("b", [4, 5, 6]),
]
),
column_descriptions={"c": "The first column"},
)


class TestColumnDescriptions:
def test_should_map_column_names_to_descriptions(self, example_table: ExampleTable) -> None:
assert example_table.column_descriptions == Table.from_columns(
[
Column("Name", ["a", "b"]),
Column("Description", ["The first column", ""]),
]
)


class TestGetColumnDescription:
def test_should_return_description_for_column(self, example_table: ExampleTable) -> None:
assert example_table.get_column_description("a") == "The first column"

def test_should_return_empty_string_if_no_description_exists(self, example_table: ExampleTable) -> None:
assert example_table.get_column_description("b") == ""

def test_should_raise_error_if_column_does_not_exist(self, example_table: ExampleTable) -> None:
with pytest.raises(UnknownColumnNameError):
example_table.get_column_description("c")

0 comments on commit 56aff62

Please sign in to comment.