Skip to content

Commit

Permalink
feat: Raise error if an untagged table is used instead of a `TaggedTa…
Browse files Browse the repository at this point in the history
…ble` (#234)

Closes #192.

### Summary of Changes

Added `UntaggedTableError` which is raised when an untagged table is
used instead of a `TaggedTable`

---------

Co-authored-by: sibre28 <86068340+sibre28@users.noreply.github.com>
Co-authored-by: Lars Reimann <mail@larsreimann.com>
Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
  • Loading branch information
4 people committed Apr 28, 2023
1 parent 7f74440 commit 8eea3dd
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 9 deletions.
5 changes: 5 additions & 0 deletions src/safeds/ml/classical/_util_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
LearningError,
ModelNotFittedError,
PredictionError,
UntaggedTableError,
)


Expand All @@ -27,7 +28,11 @@ def fit(model: Any, tagged_table: TaggedTable) -> None:
------
LearningError
If the tagged table contains invalid values or if the training failed.
UntaggedTableError
If the table is untagged.
"""
if not isinstance(tagged_table, TaggedTable) and isinstance(tagged_table, Table):
raise UntaggedTableError
try:
model.fit(
tagged_table.features._data,
Expand Down
12 changes: 9 additions & 3 deletions src/safeds/ml/classical/classification/_classifier.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

from sklearn.metrics import accuracy_score as sk_accuracy_score

if TYPE_CHECKING:
from safeds.data.tabular.containers import Table, TaggedTable
from safeds.data.tabular.containers import Table, TaggedTable
from safeds.ml.exceptions import UntaggedTableError


class Classifier(ABC):
Expand Down Expand Up @@ -87,7 +86,14 @@ def accuracy(self, validation_or_test_set: TaggedTable) -> float:
-------
accuracy : float
The calculated accuracy score, i.e. the percentage of equal data.
Raises
------
UntaggedTableError
If the table is untagged.
"""
if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table):
raise UntaggedTableError
expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

Expand Down
20 changes: 16 additions & 4 deletions src/safeds/ml/classical/regression/_regressor.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error
from sklearn.metrics import mean_squared_error as sk_mean_squared_error

from safeds.data.tabular.containers import Column, Table, TaggedTable
from safeds.data.tabular.exceptions import ColumnLengthMismatchError

if TYPE_CHECKING:
from safeds.data.tabular.containers import Column, Table, TaggedTable
from safeds.ml.exceptions import UntaggedTableError


class Regressor(ABC):
Expand Down Expand Up @@ -90,7 +88,14 @@ def mean_squared_error(self, validation_or_test_set: TaggedTable) -> float:
-------
mean_squared_error : float
The calculated mean squared error (the average of the distance of each individual row squared).
Raises
------
UntaggedTableError
If the table is untagged.
"""
if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table):
raise UntaggedTableError
expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

Expand All @@ -111,7 +116,14 @@ def mean_absolute_error(self, validation_or_test_set: TaggedTable) -> float:
-------
mean_absolute_error : float
The calculated mean absolute error (the average of the distance of each individual row).
Raises
------
UntaggedTableError
If the table is untagged.
"""
if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table):
raise UntaggedTableError
expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

Expand Down
2 changes: 2 additions & 0 deletions src/safeds/ml/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
LearningError,
ModelNotFittedError,
PredictionError,
UntaggedTableError,
)

__all__ = [
Expand All @@ -14,4 +15,5 @@
"LearningError",
"ModelNotFittedError",
"PredictionError",
"UntaggedTableError",
]
12 changes: 12 additions & 0 deletions src/safeds/ml/exceptions/_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,15 @@ class PredictionError(Exception):

def __init__(self, reason: str):
super().__init__(f"Error occurred while predicting: {reason}")


class UntaggedTableError(Exception):
"""Raised when an untagged table is used instead of a TaggedTable in a regression or classification."""

def __init__(self) -> None:
super().__init__(
(
"This method needs a tagged table.\nA tagged table is a table that additionally knows which columns are"
" features and which are the target to predict.\nUse Table.tag_column() to create a tagged table."
),
)
35 changes: 35 additions & 0 deletions tests/safeds/ml/classical/classification/test_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
LearningError,
ModelNotFittedError,
PredictionError,
UntaggedTableError,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -93,6 +94,23 @@ def test_should_raise_on_invalid_data(self, classifier: Classifier, invalid_data
with pytest.raises(LearningError):
classifier.fit(invalid_data)

@pytest.mark.parametrize(
"table",
[
Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": [0.0, 1.0, 1.0, 0.0],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
],
ids=["untagged_table"],
)
def test_should_raise_if_table_is_not_tagged(self, classifier: Classifier, table: Table) -> None:
with pytest.raises(UntaggedTableError):
classifier.fit(table) # type: ignore[arg-type]


@pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__)
class TestPredict:
Expand Down Expand Up @@ -200,3 +218,20 @@ def test_with_different_types(self) -> None:
).tag_columns(target_name="expected")

assert DummyClassifier().accuracy(table) == 0.0

@pytest.mark.parametrize(
"table",
[
Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": [0.0, 1.0, 1.0, 0.0],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
],
ids=["untagged_table"],
)
def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None:
with pytest.raises(UntaggedTableError):
DummyClassifier().accuracy(table) # type: ignore[arg-type]
56 changes: 54 additions & 2 deletions tests/safeds/ml/classical/regression/test_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
LearningError,
ModelNotFittedError,
PredictionError,
UntaggedTableError,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -104,6 +105,23 @@ def test_should_raise_on_invalid_data(self, regressor: Regressor, invalid_data:
with pytest.raises(LearningError):
regressor.fit(invalid_data)

@pytest.mark.parametrize(
"table",
[
Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": [0.0, 1.0, 1.0, 0.0],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
],
ids=["untagged_table"],
)
def test_should_raise_if_table_is_not_tagged(self, regressor: Regressor, table: Table) -> None:
with pytest.raises(UntaggedTableError):
regressor.fit(table) # type: ignore[arg-type]


@pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__)
class TestPredict:
Expand Down Expand Up @@ -214,6 +232,23 @@ def test_valid_data(self, predicted: list[float], expected: list[float], result:

assert DummyRegressor().mean_absolute_error(table) == result

@pytest.mark.parametrize(
"table",
[
Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": [0.0, 1.0, 1.0, 0.0],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
],
ids=["untagged_table"],
)
def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None:
with pytest.raises(UntaggedTableError):
DummyRegressor().mean_absolute_error(table) # type: ignore[arg-type]


class TestMeanSquaredError:
@pytest.mark.parametrize(
Expand All @@ -227,6 +262,23 @@ def test_valid_data(self, predicted: list[float], expected: list[float], result:

assert DummyRegressor().mean_squared_error(table) == result

@pytest.mark.parametrize(
"table",
[
Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": [0.0, 1.0, 1.0, 0.0],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
],
ids=["untagged_table"],
)
def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None:
with pytest.raises(UntaggedTableError):
DummyRegressor().mean_squared_error(table) # type: ignore[arg-type]


class TestCheckMetricsPreconditions:
@pytest.mark.parametrize(
Expand All @@ -243,7 +295,7 @@ def test_should_raise_if_validation_fails(
expected: list[str | int],
error: type[Exception],
) -> None:
actual_column = Column("actual", pd.Series(actual))
expected_column = Column("expected", pd.Series(expected))
actual_column: Column = Column("actual", pd.Series(actual))
expected_column: Column = Column("expected", pd.Series(expected))
with pytest.raises(error):
_check_metrics_preconditions(actual_column, expected_column)

0 comments on commit 8eea3dd

Please sign in to comment.