feat: Raise error if an untagged table is used instead of a `TaggedTa…

…ble` (#234) Closes #192. ### Summary of Changes Added `UntaggedTableError` which is raised when an untagged table is used instead of a `TaggedTable` --------- Co-authored-by: sibre28 <86068340+sibre28@users.noreply.github.com> Co-authored-by: Lars Reimann <mail@larsreimann.com> Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
Safe-DS · Apr 28, 2023 · 8eea3dd · 8eea3dd
1 parent 7f74440
commit 8eea3dd
Show file tree

Hide file tree

Showing 7 changed files with 133 additions and 9 deletions.
diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py
@@ -8,6 +8,7 @@
     LearningError,
     ModelNotFittedError,
     PredictionError,
+    UntaggedTableError,
 )
 
 
@@ -27,7 +28,11 @@ def fit(model: Any, tagged_table: TaggedTable) -> None:
     ------
     LearningError
         If the tagged table contains invalid values or if the training failed.
+    UntaggedTableError
+        If the table is untagged.
     """
+    if not isinstance(tagged_table, TaggedTable) and isinstance(tagged_table, Table):
+        raise UntaggedTableError
     try:
         model.fit(
             tagged_table.features._data,

diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING
 
 from sklearn.metrics import accuracy_score as sk_accuracy_score
 
-if TYPE_CHECKING:
-    from safeds.data.tabular.containers import Table, TaggedTable
+from safeds.data.tabular.containers import Table, TaggedTable
+from safeds.ml.exceptions import UntaggedTableError
 
 
 class Classifier(ABC):
@@ -87,7 +86,14 @@ def accuracy(self, validation_or_test_set: TaggedTable) -> float:
         -------
         accuracy : float
             The calculated accuracy score, i.e. the percentage of equal data.
+
+        Raises
+        ------
+        UntaggedTableError
+            If the table is untagged.
         """
+        if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table):
+            raise UntaggedTableError
         expected = validation_or_test_set.target
         predicted = self.predict(validation_or_test_set.features).target
 

diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py
@@ -1,15 +1,13 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING
 
 from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error
 from sklearn.metrics import mean_squared_error as sk_mean_squared_error
 
+from safeds.data.tabular.containers import Column, Table, TaggedTable
 from safeds.data.tabular.exceptions import ColumnLengthMismatchError
-
-if TYPE_CHECKING:
-    from safeds.data.tabular.containers import Column, Table, TaggedTable
+from safeds.ml.exceptions import UntaggedTableError
 
 
 class Regressor(ABC):
@@ -90,7 +88,14 @@ def mean_squared_error(self, validation_or_test_set: TaggedTable) -> float:
         -------
         mean_squared_error : float
             The calculated mean squared error (the average of the distance of each individual row squared).
+
+        Raises
+        ------
+        UntaggedTableError
+            If the table is untagged.
         """
+        if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table):
+            raise UntaggedTableError
         expected = validation_or_test_set.target
         predicted = self.predict(validation_or_test_set.features).target
 
@@ -111,7 +116,14 @@ def mean_absolute_error(self, validation_or_test_set: TaggedTable) -> float:
         -------
         mean_absolute_error : float
             The calculated mean absolute error (the average of the distance of each individual row).
+
+        Raises
+        ------
+        UntaggedTableError
+            If the table is untagged.
         """
+        if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table):
+            raise UntaggedTableError
         expected = validation_or_test_set.target
         predicted = self.predict(validation_or_test_set.features).target
 

diff --git a/src/safeds/ml/exceptions/__init__.py b/src/safeds/ml/exceptions/__init__.py
@@ -6,6 +6,7 @@
     LearningError,
     ModelNotFittedError,
     PredictionError,
+    UntaggedTableError,
 )
 
 __all__ = [
@@ -14,4 +15,5 @@
     "LearningError",
     "ModelNotFittedError",
     "PredictionError",
+    "UntaggedTableError",
 ]
diff --git a/src/safeds/ml/exceptions/_exceptions.py b/src/safeds/ml/exceptions/_exceptions.py
@@ -59,3 +59,15 @@ class PredictionError(Exception):
 
     def __init__(self, reason: str):
         super().__init__(f"Error occurred while predicting: {reason}")
+
+
+class UntaggedTableError(Exception):
+    """Raised when an untagged table is used instead of a TaggedTable in a regression or classification."""
+
+    def __init__(self) -> None:
+        super().__init__(
+            (
+                "This method needs a tagged table.\nA tagged table is a table that additionally knows which columns are"
+                " features and which are the target to predict.\nUse Table.tag_column() to create a tagged table."
+            ),
+        )
diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py
@@ -20,6 +20,7 @@
     LearningError,
     ModelNotFittedError,
     PredictionError,
+    UntaggedTableError,
 )
 
 if TYPE_CHECKING:
@@ -93,6 +94,23 @@ def test_should_raise_on_invalid_data(self, classifier: Classifier, invalid_data
         with pytest.raises(LearningError):
             classifier.fit(invalid_data)
 
+    @pytest.mark.parametrize(
+        "table",
+        [
+            Table.from_dict(
+                {
+                    "a": [1.0, 0.0, 0.0, 0.0],
+                    "b": [0.0, 1.0, 1.0, 0.0],
+                    "c": [0.0, 0.0, 0.0, 1.0],
+                },
+            ),
+        ],
+        ids=["untagged_table"],
+    )
+    def test_should_raise_if_table_is_not_tagged(self, classifier: Classifier, table: Table) -> None:
+        with pytest.raises(UntaggedTableError):
+            classifier.fit(table)  # type: ignore[arg-type]
+
 
 @pytest.mark.parametrize("classifier", classifiers(), ids=lambda x: x.__class__.__name__)
 class TestPredict:
@@ -200,3 +218,20 @@ def test_with_different_types(self) -> None:
         ).tag_columns(target_name="expected")
 
         assert DummyClassifier().accuracy(table) == 0.0
+
+    @pytest.mark.parametrize(
+        "table",
+        [
+            Table.from_dict(
+                {
+                    "a": [1.0, 0.0, 0.0, 0.0],
+                    "b": [0.0, 1.0, 1.0, 0.0],
+                    "c": [0.0, 0.0, 0.0, 1.0],
+                },
+            ),
+        ],
+        ids=["untagged_table"],
+    )
+    def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None:
+        with pytest.raises(UntaggedTableError):
+            DummyClassifier().accuracy(table)  # type: ignore[arg-type]
diff --git a/tests/safeds/ml/classical/regression/test_regressor.py b/tests/safeds/ml/classical/regression/test_regressor.py
@@ -28,6 +28,7 @@
     LearningError,
     ModelNotFittedError,
     PredictionError,
+    UntaggedTableError,
 )
 
 if TYPE_CHECKING:
@@ -104,6 +105,23 @@ def test_should_raise_on_invalid_data(self, regressor: Regressor, invalid_data:
         with pytest.raises(LearningError):
             regressor.fit(invalid_data)
 
+    @pytest.mark.parametrize(
+        "table",
+        [
+            Table.from_dict(
+                {
+                    "a": [1.0, 0.0, 0.0, 0.0],
+                    "b": [0.0, 1.0, 1.0, 0.0],
+                    "c": [0.0, 0.0, 0.0, 1.0],
+                },
+            ),
+        ],
+        ids=["untagged_table"],
+    )
+    def test_should_raise_if_table_is_not_tagged(self, regressor: Regressor, table: Table) -> None:
+        with pytest.raises(UntaggedTableError):
+            regressor.fit(table)  # type: ignore[arg-type]
+
 
 @pytest.mark.parametrize("regressor", regressors(), ids=lambda x: x.__class__.__name__)
 class TestPredict:
@@ -214,6 +232,23 @@ def test_valid_data(self, predicted: list[float], expected: list[float], result:
 
         assert DummyRegressor().mean_absolute_error(table) == result
 
+    @pytest.mark.parametrize(
+        "table",
+        [
+            Table.from_dict(
+                {
+                    "a": [1.0, 0.0, 0.0, 0.0],
+                    "b": [0.0, 1.0, 1.0, 0.0],
+                    "c": [0.0, 0.0, 0.0, 1.0],
+                },
+            ),
+        ],
+        ids=["untagged_table"],
+    )
+    def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None:
+        with pytest.raises(UntaggedTableError):
+            DummyRegressor().mean_absolute_error(table)  # type: ignore[arg-type]
+
 
 class TestMeanSquaredError:
     @pytest.mark.parametrize(
@@ -227,6 +262,23 @@ def test_valid_data(self, predicted: list[float], expected: list[float], result:
 
         assert DummyRegressor().mean_squared_error(table) == result
 
+    @pytest.mark.parametrize(
+        "table",
+        [
+            Table.from_dict(
+                {
+                    "a": [1.0, 0.0, 0.0, 0.0],
+                    "b": [0.0, 1.0, 1.0, 0.0],
+                    "c": [0.0, 0.0, 0.0, 1.0],
+                },
+            ),
+        ],
+        ids=["untagged_table"],
+    )
+    def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None:
+        with pytest.raises(UntaggedTableError):
+            DummyRegressor().mean_squared_error(table)  # type: ignore[arg-type]
+
 
 class TestCheckMetricsPreconditions:
     @pytest.mark.parametrize(
@@ -243,7 +295,7 @@ def test_should_raise_if_validation_fails(
         expected: list[str | int],
         error: type[Exception],
     ) -> None:
-        actual_column = Column("actual", pd.Series(actual))
-        expected_column = Column("expected", pd.Series(expected))
+        actual_column: Column = Column("actual", pd.Series(actual))
+        expected_column: Column = Column("expected", pd.Series(expected))
         with pytest.raises(error):
             _check_metrics_preconditions(actual_column, expected_column)