-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: subclass of
Table
that can include column descriptions (#43)
Closes #41. ### Summary of Changes Create a new class `ExampleTable` that is a subclass of `Table` and can additionally store column descriptions. This allows the example data to be self-contained. The original `describe_XY` methods are removed. --------- Co-authored-by: lars-reimann <lars-reimann@users.noreply.github.com>
- Loading branch information
1 parent
8962627
commit 56aff62
Showing
13 changed files
with
189 additions
and
104 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
from ._house_sales import describe_house_sales_columns, load_house_sales | ||
from ._titanic import describe_titanic_columns, load_titanic | ||
from ._house_sales import load_house_sales | ||
from ._titanic import load_titanic |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
from ._house_sales import describe_house_sales_columns, load_house_sales | ||
from ._house_sales import load_house_sales |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,69 +1,48 @@ | ||
import os | ||
|
||
from safeds.data.tabular.containers import Table | ||
from safeds_examples.tabular.containers import ExampleTable | ||
|
||
_path = os.path.join(os.path.dirname(__file__), "data", "house_sales.csv") | ||
|
||
|
||
def load_house_sales() -> Table: | ||
def load_house_sales() -> ExampleTable: | ||
""" | ||
Loads the "House Sales" dataset. | ||
Returns | ||
------- | ||
Table | ||
ExampleTable | ||
The "House Sales" dataset. | ||
""" | ||
|
||
return Table.from_csv_file(_path) | ||
|
||
|
||
def describe_house_sales_columns() -> Table: | ||
""" | ||
Returns a `Table` with two columns `"Name"` and `"Description"`, containing the name of a column in the "House | ||
Sales" dataset and its description respectively. | ||
Returns | ||
------- | ||
Table | ||
A `Table` with names and descriptions for all columns of the "House Sales" dataset. | ||
""" | ||
|
||
return Table( | ||
[ | ||
{"Name": "id", "Description": "A unique identifier"}, | ||
{"Name": "year", "Description": "Year of sale"}, | ||
{"Name": "month", "Description": "Month of sale"}, | ||
{"Name": "day", "Description": "Day of sale"}, | ||
{"Name": "zipcode", "Description": "Zipcode"}, | ||
{"Name": "latitude", "Description": "Latitude"}, | ||
{"Name": "longitude", "Description": "Longitude"}, | ||
{"Name": "sqft_lot", "Description": "Lot area in square feet"}, | ||
{"Name": "sqft_living", "Description": "Interior living space in square feet"}, | ||
{"Name": "sqft_above", "Description": "Interior living space above ground in square feet"}, | ||
{"Name": "sqft_basement", "Description": "Interior living space below ground in square feet"}, | ||
{"Name": "floors", "Description": "Number of floors"}, | ||
{"Name": "bedrooms", "Description": "Number of bedrooms"}, | ||
{ | ||
"Name": "bathrooms", | ||
"Description": "Number of bathrooms.\n\n" | ||
"Fractional values indicate that components (toilet/sink/shower/bathtub) are missing.", | ||
}, | ||
{"Name": "waterfront", "Description": "Whether the building overlooks a waterfront (0 = no, 1 = yes)"}, | ||
{"Name": "view", "Description": "Rating of the view (1 to 5, higher is better)"}, | ||
{"Name": "condition", "Description": "Rating of the condition of the house (1 to 5, higher is better)"}, | ||
{"Name": "grade", "Description": "Rating of building construction and design (1 to 13, higher is better)"}, | ||
{"Name": "year_built", "Description": "Year the house was built"}, | ||
{ | ||
"Name": "year_renovated", | ||
"Description": "Year the house was last renovated.\n\n" | ||
"A value of 0 indicates that it was never renovated.", | ||
}, | ||
{"Name": "sqft_lot_15nn", "Description": "Lot area of the 15 nearest neighbors in square feet"}, | ||
{ | ||
"Name": "sqft_living_15nn", | ||
"Description": "Interior living space of the 15 nearest neighbors in square feet", | ||
}, | ||
{"Name": "price", "Description": "Price the house sold for in USD"}, | ||
] | ||
return ExampleTable( | ||
Table.from_csv_file(_path), | ||
column_descriptions={ | ||
"id": "A unique identifier", | ||
"year": "Year of sale", | ||
"month": "Month of sale", | ||
"day": "Day of sale", | ||
"zipcode": "Zipcode", | ||
"latitude": "Latitude", | ||
"longitude": "Longitude", | ||
"sqft_lot": "Lot area in square feet", | ||
"sqft_living": "Interior living space in square feet", | ||
"sqft_above": "Interior living space above ground in square feet", | ||
"sqft_basement": "Interior living space below ground in square feet", | ||
"floors": "Number of floors", | ||
"bedrooms": "Number of bedrooms", | ||
"bathrooms": "Number of bathrooms.\n\n" | ||
"Fractional values indicate that components (toilet/sink/shower/bathtub) are missing.", | ||
"waterfront": "Whether the building overlooks a waterfront (0 = no, 1 = yes)", | ||
"view": "Rating of the view (1 to 5, higher is better)", | ||
"condition": "Rating of the condition of the house (1 to 5, higher is better)", | ||
"grade": "Rating of building construction and design (1 to 13, higher is better)", | ||
"year_built": "Year the house was built", | ||
"year_renovated": "Year the house was last renovated.\n\n" | ||
"A value of 0 indicates that it was never renovated.", | ||
"sqft_lot_15nn": "Lot area of the 15 nearest neighbors in square feet", | ||
"sqft_living_15nn": "Interior living space of the 15 nearest neighbors in square feet", | ||
"price": "Price the house sold for in USD", | ||
}, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
from ._titanic import describe_titanic_columns, load_titanic | ||
from ._titanic import load_titanic |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,47 +1,35 @@ | ||
import os | ||
|
||
from safeds.data.tabular.containers import Table | ||
from safeds_examples.tabular.containers import ExampleTable | ||
|
||
_path = os.path.join(os.path.dirname(__file__), "data", "titanic.csv") | ||
|
||
|
||
def load_titanic() -> Table: | ||
def load_titanic() -> ExampleTable: | ||
""" | ||
Loads the "Titanic" dataset. | ||
Returns | ||
------- | ||
Table | ||
ExampleTable | ||
The "Titanic" dataset. | ||
""" | ||
|
||
return Table.from_csv_file(_path) | ||
|
||
|
||
def describe_titanic_columns() -> Table: | ||
""" | ||
Returns a `Table` with two columns `"Name"` and `"Description"`, containing the name of a column in the "Titanic" | ||
dataset and its description respectively. | ||
Returns | ||
------- | ||
Table | ||
A `Table` with names and descriptions for all columns of the "Titanic" dataset. | ||
""" | ||
|
||
return Table( | ||
[ | ||
{"Name": "id", "Description": "A unique identifier"}, | ||
{"Name": "name", "Description": "Name of the passenger"}, | ||
{"Name": "sex", "Description": "Sex of the passenger"}, | ||
{"Name": "age", "Description": "Age of the passenger at the time of the accident"}, | ||
{"Name": "siblings_spouses", "Description": "Number of siblings or spouses aboard"}, | ||
{"Name": "parents_children", "Description": "Number of parents or children aboard"}, | ||
{"Name": "ticket", "Description": "Ticket number"}, | ||
{"Name": "travel_class", "Description": "Travel class (1 = first, 2 = second, 3 = third)"}, | ||
{"Name": "fare", "Description": "Fare"}, | ||
{"Name": "cabin", "Description": "Cabin number"}, | ||
{"Name": "port_embarked", "Description": "Port of embarkation"}, | ||
{"Name": "survived", "Description": "Whether the passenger survived the accident"}, | ||
] | ||
return ExampleTable( | ||
Table.from_csv_file(_path), | ||
column_descriptions={ | ||
"id": "A unique identifier", | ||
"name": "Name of the passenger", | ||
"sex": "Sex of the passenger", | ||
"age": "Age of the passenger at the time of the accident", | ||
"siblings_spouses": "Number of siblings or spouses aboard", | ||
"parents_children": "Number of parents or children aboard", | ||
"ticket": "Ticket number", | ||
"travel_class": "Travel class (1 = first, 2 = second, 3 = third)", | ||
"fare": "Fare", | ||
"cabin": "Cabin number", | ||
"port_embarked": "Port of embarkation", | ||
"survived": "Whether the passenger survived the accident (0 = no, 1 = yes)", | ||
}, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from ._example_table import ExampleTable |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
from safeds.data.tabular.containers import Table | ||
from safeds.exceptions import UnknownColumnNameError | ||
|
||
|
||
class ExampleTable(Table): | ||
""" | ||
A `Table` with descriptions for its columns. | ||
Parameters | ||
---------- | ||
table : Table | ||
The table. | ||
column_descriptions : dict[str, str] | ||
A dictionary mapping column names to their descriptions. | ||
Raises | ||
------ | ||
UnknownColumnNameError | ||
If a column name in `descriptions` does not exist in `table`. | ||
""" | ||
|
||
def __init__(self, table: Table, column_descriptions: dict[str, str]) -> None: | ||
# Check that all column names in `descriptions` exist in `table` | ||
invalid_column_names = set(column_descriptions.keys()) - set(table.get_column_names()) | ||
if invalid_column_names: | ||
raise UnknownColumnNameError(list(invalid_column_names)) | ||
|
||
super().__init__(table._data, table.schema) | ||
self._descriptions = column_descriptions | ||
|
||
@property | ||
def column_descriptions(self) -> Table: | ||
""" | ||
Returns a `Table` with two columns `"Name"` and `"Description"`, containing the name of a column and its | ||
description respectively. | ||
""" | ||
|
||
return Table( | ||
[ | ||
{"Name": column_name, "Description": self.get_column_description(column_name)} | ||
for column_name in self.get_column_names() | ||
] | ||
) | ||
|
||
def get_column_description(self, column_name: str) -> str: | ||
""" | ||
Get the description of a column. If no description exists, an empty string is returned. | ||
Parameters | ||
---------- | ||
column_name : str | ||
The name of the column. | ||
Returns | ||
------- | ||
description : str | ||
The description of the column. | ||
Raises | ||
------ | ||
UnknownColumnNameError | ||
If no column with the given name exists. | ||
""" | ||
|
||
if column_name not in self.get_column_names(): | ||
raise UnknownColumnNameError([column_name]) | ||
|
||
return self._descriptions.get(column_name, "") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
53 changes: 53 additions & 0 deletions
53
tests/safeds_examples/tabular/containers/test_example_table.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import pytest | ||
from safeds.data.tabular.containers import Column, Table | ||
from safeds.exceptions import UnknownColumnNameError | ||
from safeds_examples.tabular.containers import ExampleTable | ||
|
||
|
||
@pytest.fixture | ||
def example_table() -> ExampleTable: | ||
return ExampleTable( | ||
Table.from_columns( | ||
[ | ||
Column("a", [1, 2, 3]), | ||
Column("b", [4, 5, 6]), | ||
] | ||
), | ||
column_descriptions={"a": "The first column"}, | ||
) | ||
|
||
|
||
class TestInit: | ||
def test_should_raise_if_column_does_not_exist(self) -> None: | ||
with pytest.raises(UnknownColumnNameError): | ||
ExampleTable( | ||
Table.from_columns( | ||
[ | ||
Column("a", [1, 2, 3]), | ||
Column("b", [4, 5, 6]), | ||
] | ||
), | ||
column_descriptions={"c": "The first column"}, | ||
) | ||
|
||
|
||
class TestColumnDescriptions: | ||
def test_should_map_column_names_to_descriptions(self, example_table: ExampleTable) -> None: | ||
assert example_table.column_descriptions == Table.from_columns( | ||
[ | ||
Column("Name", ["a", "b"]), | ||
Column("Description", ["The first column", ""]), | ||
] | ||
) | ||
|
||
|
||
class TestGetColumnDescription: | ||
def test_should_return_description_for_column(self, example_table: ExampleTable) -> None: | ||
assert example_table.get_column_description("a") == "The first column" | ||
|
||
def test_should_return_empty_string_if_no_description_exists(self, example_table: ExampleTable) -> None: | ||
assert example_table.get_column_description("b") == "" | ||
|
||
def test_should_raise_error_if_column_does_not_exist(self, example_table: ExampleTable) -> None: | ||
with pytest.raises(UnknownColumnNameError): | ||
example_table.get_column_description("c") |