Skip to content

Commit

Permalink
feat: recall and F1-score metrics for classification (#277)
Browse files Browse the repository at this point in the history
Closes #187.
Closes #186 .

### Summary of Changes

Added recall and F1-score functions to the _classifier.

Co-authered-by: 128832338+patrikguempel@users.noreply.github.com

---------

Co-authored-by: Lars Reimann <mail@larsreimann.com>
Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
  • Loading branch information
3 people committed May 5, 2023
1 parent 766f2ff commit 2cf93cc
Show file tree
Hide file tree
Showing 2 changed files with 181 additions and 2 deletions.
85 changes: 83 additions & 2 deletions src/safeds/ml/classical/classification/_classifier.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

from sklearn.metrics import accuracy_score as sk_accuracy_score

from safeds.data.tabular.containers import Table, TaggedTable
from safeds.ml.exceptions import UntaggedTableError

if TYPE_CHECKING:
from typing import Any


class Classifier(ABC):
"""Abstract base class for all classifiers."""
Expand Down Expand Up @@ -100,15 +104,15 @@ def accuracy(self, validation_or_test_set: TaggedTable) -> float:

return sk_accuracy_score(expected_values._data, predicted_values._data)

def precision(self, validation_or_test_set: TaggedTable, positive_class: int = 1) -> float:
def precision(self, validation_or_test_set: TaggedTable, positive_class: Any = 1) -> float:
"""
Compute the classifier's precision on the given data.
Parameters
----------
validation_or_test_set : TaggedTable
The validation or test set.
positive_class : int | str
positive_class : Any
The class to be considered positive. All other classes are considered negative.
Returns
Expand Down Expand Up @@ -136,3 +140,80 @@ def precision(self, validation_or_test_set: TaggedTable, positive_class: int = 1
if (n_true_positives + n_false_positives) == 0:
return 1.0
return n_true_positives / (n_true_positives + n_false_positives)

def recall(self, validation_or_test_set: TaggedTable, positive_class: Any = 1) -> float:
"""
Compute the classifier's recall on the given data.
Parameters
----------
validation_or_test_set : TaggedTable
The validation or test set.
positive_class : Any
The class to be considered positive. All other classes are considered negative.
Returns
-------
recall : float
The calculated recall score, i.e. the ratio of correctly predicted positives to all expected positives.
Return 1 if there are no positive expectations.
"""
if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table):
raise UntaggedTableError

expected_values = validation_or_test_set.target
predicted_values = self.predict(validation_or_test_set.features).target

n_true_positives = 0
n_false_negatives = 0

for expected_value, predicted_value in zip(expected_values, predicted_values, strict=True):
if predicted_value == positive_class:
if expected_value == positive_class:
n_true_positives += 1
elif expected_value == positive_class:
n_false_negatives += 1

if (n_true_positives + n_false_negatives) == 0:
return 1.0
return n_true_positives / (n_true_positives + n_false_negatives)

def f1_score(self, validation_or_test_set: TaggedTable, positive_class: Any = 1) -> float:
"""
Compute the classifier's $F_1$-score on the given data.
Parameters
----------
validation_or_test_set : TaggedTable
The validation or test set.
positive_class : Any
The class to be considered positive. All other classes are considered negative.
Returns
-------
f1_score : float
The calculated $F_1$-score, i.e. the harmonic mean between precision and recall.
Return 1 if there are no positive expectations and predictions.
"""
if not isinstance(validation_or_test_set, TaggedTable) and isinstance(validation_or_test_set, Table):
raise UntaggedTableError

expected_values = validation_or_test_set.target
predicted_values = self.predict(validation_or_test_set.features).target

n_true_positives = 0
n_false_negatives = 0
n_false_positives = 0

for expected_value, predicted_value in zip(expected_values, predicted_values, strict=True):
if predicted_value == positive_class:
if expected_value == positive_class:
n_true_positives += 1
else:
n_false_positives += 1
elif expected_value == positive_class:
n_false_negatives += 1

if (2 * n_true_positives + n_false_positives + n_false_negatives) == 0:
return 1.0
return 2 * n_true_positives / (2 * n_true_positives + n_false_positives + n_false_negatives)
98 changes: 98 additions & 0 deletions tests/safeds/ml/classical/classification/test_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,3 +284,101 @@ def test_should_return_1_if_never_expected_to_be_positive(self) -> None:
def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None:
with pytest.raises(UntaggedTableError):
DummyClassifier().precision(table) # type: ignore[arg-type]


class TestRecall:
def test_should_compare_result(self) -> None:
table = Table.from_dict(
{
"predicted": [1, 1, 0, 2],
"expected": [1, 0, 1, 2],
},
).tag_columns(target_name="expected")

assert DummyClassifier().recall(table, 1) == 0.5

def test_should_compare_result_with_different_types(self) -> None:
table = Table.from_dict(
{
"predicted": [1, "1", "0", "2"],
"expected": [1, 0, 1, 2],
},
).tag_columns(target_name="expected")

assert DummyClassifier().recall(table, 1) == 0.5

def test_should_return_1_if_never_expected_to_be_positive(self) -> None:
table = Table.from_dict(
{
"predicted": ["lol", "1", "0", "2"],
"expected": [2, 0, 5, 2],
},
).tag_columns(target_name="expected")

assert DummyClassifier().recall(table, 1) == 1.0

@pytest.mark.parametrize(
"table",
[
Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": [0.0, 1.0, 1.0, 0.0],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
],
ids=["untagged_table"],
)
def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None:
with pytest.raises(UntaggedTableError):
DummyClassifier().recall(table) # type: ignore[arg-type]


class TestF1Score:
def test_should_compare_result(self) -> None:
table = Table.from_dict(
{
"predicted": [1, 1, 0, 2],
"expected": [1, 0, 1, 2],
},
).tag_columns(target_name="expected")

assert DummyClassifier().f1_score(table, 1) == 0.5

def test_should_compare_result_with_different_types(self) -> None:
table = Table.from_dict(
{
"predicted": [1, "1", "0", "2"],
"expected": [1, 0, 1, 2],
},
).tag_columns(target_name="expected")

assert DummyClassifier().f1_score(table, 1) == pytest.approx(0.6666667)

def test_should_return_1_if_never_expected_or_predicted_to_be_positive(self) -> None:
table = Table.from_dict(
{
"predicted": ["lol", "1", "0", "2"],
"expected": [2, 0, 2, 2],
},
).tag_columns(target_name="expected")

assert DummyClassifier().f1_score(table, 1) == 1.0

@pytest.mark.parametrize(
"table",
[
Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": [0.0, 1.0, 1.0, 0.0],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
],
ids=["untagged_table"],
)
def test_should_raise_if_table_is_not_tagged(self, table: Table) -> None:
with pytest.raises(UntaggedTableError):
DummyClassifier().f1_score(table) # type: ignore[arg-type]

0 comments on commit 2cf93cc

Please sign in to comment.