feat: recall and F1-score metrics for classification (#277)

Closes #187. Closes #186 . ### Summary of Changes Added recall and F1-score functions to the _classifier. Co-authered-by: 128832338+patrikguempel@users.noreply.github.com --------- Co-authored-by: Lars Reimann <mail@larsreimann.com> Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
Safe-DS · May 5, 2023 · 2cf93cc · 2cf93cc
1 parent 766f2ff
commit 2cf93cc
Show file tree

Hide file tree

Showing 2 changed files with 181 additions and 2 deletions.
diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py
@@ -1,12 +1,16 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
 
 from sklearn.metrics import accuracy_score as sk_accuracy_score
 
 from safeds.data.tabular.containers import Table, TaggedTable
 from safeds.ml.exceptions import UntaggedTableError
 
+if TYPE_CHECKING:
+    from typing import Any
+
 
 class Classifier(ABC):
     """Abstract base class for all classifiers."""
@@ -100,15 +104,15 @@ def accuracy(self, validation_or_test_set: TaggedTable) -> float:
 
         return sk_accuracy_score(expected_values._data, predicted_values._data)
 
-    def precision(self, validation_or_test_set: TaggedTable, positive_class: int = 1) -> float:
+    def precision(self, validation_or_test_set: TaggedTable, positive_class: Any = 1) -> float:
         """
         Compute the classifier's precision on the given data.
 
         Parameters
         ----------
         validation_or_test_set : TaggedTable
             The validation or test set.
-        positive_class : int | str
+        positive_class : Any
             The class to be considered positive. All other classes are considered negative.
 
         Returns
@@ -136,3 +140,80 @@ def precision(self, validation_or_test_set: TaggedTable, positive_class: int = 1
         if (n_true_positives + n_false_positives) == 0:
             return 1.0
         return n_true_positives / (n_true_positives + n_false_positives)
+
+    def recall(self, validation_or_test_set: TaggedTable, positive_class: Any = 1) -> float:
+        """
+        Compute the classifier's recall on the given data.
+
+        Parameters
+        ----------
+        validation_or_test_set : TaggedTable
+            The validation or test set.
+        positive_class : Any
+            The class to be considered positive. All other classes are considered negative.
+
+        Returns
+        -------
+        recall : float
+            The calculated recall score, i.e. the ratio of correctly predicted positives to all expected positives.
+            Return 1 if there are no positive expectations.
+        """
+        if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table):
+            raise UntaggedTableError
+
+        expected_values = validation_or_test_set.target
+        predicted_values = self.predict(validation_or_test_set.features).target
+
+        n_true_positives = 0
+        n_false_negatives = 0
+
+        for expected_value, predicted_value in zip(expected_values, predicted_values, strict=True):
+            if predicted_value == positive_class:
+                if expected_value == positive_class:
+                    n_true_positives += 1
+            elif expected_value == positive_class:
+                n_false_negatives += 1
+
+        if (n_true_positives + n_false_negatives) == 0:
+            return 1.0
+        return n_true_positives / (n_true_positives + n_false_negatives)
+
+    def f1_score(self, validation_or_test_set: TaggedTable, positive_class: Any = 1) -> float:
+        """
+        Compute the classifier's $F_1$-score on the given data.
+
+        Parameters
+        ----------
+        validation_or_test_set : TaggedTable
+            The validation or test set.
+        positive_class : Any
+            The class to be considered positive. All other classes are considered negative.
+
+        Returns
+        -------
+        f1_score : float
+            The calculated $F_1$-score, i.e. the harmonic mean between precision and recall.
+            Return 1 if there are no positive expectations and predictions.
+        """
+        if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table):
+            raise UntaggedTableError
+
+        expected_values = validation_or_test_set.target
+        predicted_values = self.predict(validation_or_test_set.features).target
+
+        n_true_positives = 0
+        n_false_negatives = 0
+        n_false_positives = 0
+
+        for expected_value, predicted_value in zip(expected_values, predicted_values, strict=True):
+            if predicted_value == positive_class:
+                if expected_value == positive_class:
+                    n_true_positives += 1
+                else:
+                    n_false_positives += 1
+            elif expected_value == positive_class:
+                n_false_negatives += 1
+
+        if (2 * n_true_positives + n_false_positives + n_false_negatives) == 0:
+            return 1.0
+        return 2 * n_true_positives / (2 * n_true_positives + n_false_positives + n_false_negatives)
diff --git a/tests/safeds/ml/classical/classification/test_classifier.py b/tests/safeds/ml/classical/classification/test_classifier.py
@@ -284,3 +284,101 @@ def test_should_return_1_if_never_expected_to_be_positive(self) -> None:
     def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None:
         with pytest.raises(UntaggedTableError):
             DummyClassifier().precision(table)  # type: ignore[arg-type]
+
+
+class TestRecall:
+    def test_should_compare_result(self) -> None:
+        table = Table.from_dict(
+            {
+                "predicted": [1, 1, 0, 2],
+                "expected": [1, 0, 1, 2],
+            },
+        ).tag_columns(target_name="expected")
+
+        assert DummyClassifier().recall(table, 1) == 0.5
+
+    def test_should_compare_result_with_different_types(self) -> None:
+        table = Table.from_dict(
+            {
+                "predicted": [1, "1", "0", "2"],
+                "expected": [1, 0, 1, 2],
+            },
+        ).tag_columns(target_name="expected")
+
+        assert DummyClassifier().recall(table, 1) == 0.5
+
+    def test_should_return_1_if_never_expected_to_be_positive(self) -> None:
+        table = Table.from_dict(
+            {
+                "predicted": ["lol", "1", "0", "2"],
+                "expected": [2, 0, 5, 2],
+            },
+        ).tag_columns(target_name="expected")
+
+        assert DummyClassifier().recall(table, 1) == 1.0
+
+    @pytest.mark.parametrize(
+        "table",
+        [
+            Table.from_dict(
+                {
+                    "a": [1.0, 0.0, 0.0, 0.0],
+                    "b": [0.0, 1.0, 1.0, 0.0],
+                    "c": [0.0, 0.0, 0.0, 1.0],
+                },
+            ),
+        ],
+        ids=["untagged_table"],
+    )
+    def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None:
+        with pytest.raises(UntaggedTableError):
+            DummyClassifier().recall(table)  # type: ignore[arg-type]
+
+
+class TestF1Score:
+    def test_should_compare_result(self) -> None:
+        table = Table.from_dict(
+            {
+                "predicted": [1, 1, 0, 2],
+                "expected": [1, 0, 1, 2],
+            },
+        ).tag_columns(target_name="expected")
+
+        assert DummyClassifier().f1_score(table, 1) == 0.5
+
+    def test_should_compare_result_with_different_types(self) -> None:
+        table = Table.from_dict(
+            {
+                "predicted": [1, "1", "0", "2"],
+                "expected": [1, 0, 1, 2],
+            },
+        ).tag_columns(target_name="expected")
+
+        assert DummyClassifier().f1_score(table, 1) == pytest.approx(0.6666667)
+
+    def test_should_return_1_if_never_expected_or_predicted_to_be_positive(self) -> None:
+        table = Table.from_dict(
+            {
+                "predicted": ["lol", "1", "0", "2"],
+                "expected": [2, 0, 2, 2],
+            },
+        ).tag_columns(target_name="expected")
+
+        assert DummyClassifier().f1_score(table, 1) == 1.0
+
+    @pytest.mark.parametrize(
+        "table",
+        [
+            Table.from_dict(
+                {
+                    "a": [1.0, 0.0, 0.0, 0.0],
+                    "b": [0.0, 1.0, 1.0, 0.0],
+                    "c": [0.0, 0.0, 0.0, 1.0],
+                },
+            ),
+        ],
+        ids=["untagged_table"],
+    )
+    def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None:
+        with pytest.raises(UntaggedTableError):
+            DummyClassifier().f1_score(table)  # type: ignore[arg-type]