Skip to content

Commit

Permalink
feat: Regressor.summarize_metrics and Classifier.summarize_metrics (
Browse files Browse the repository at this point in the history
#729)

Closes #713

### Summary of Changes

Add `Regressor.summarize_metrics` and `Classifier.summarize_metrics` to
quickly check suitable metrics.

---------

Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
  • Loading branch information
lars-reimann and megalinter-bot committed May 5, 2024
1 parent c89e0bf commit 1cc14b1
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 14 deletions.
38 changes: 37 additions & 1 deletion src/safeds/ml/classical/classification/_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,43 @@ def _get_sklearn_classifier(self) -> ClassifierMixin:
The sklearn Classifier.
"""

# noinspection PyProtectedMember
# ------------------------------------------------------------------------------------------------------------------
# Metrics
# ------------------------------------------------------------------------------------------------------------------

def summarize_metrics(self, validation_or_test_set: TabularDataset, positive_class: Any) -> Table:
"""
Summarize the classifier's metrics on the given data.
Parameters
----------
validation_or_test_set:
The validation or test set.
positive_class:
The class to be considered positive. All other classes are considered negative.
Returns
-------
metrics:
A table containing the classifier's metrics.
Raises
------
TypeError
If a table is passed instead of a tabular dataset.
"""
accuracy = self.accuracy(validation_or_test_set)
precision = self.precision(validation_or_test_set, positive_class)
recall = self.recall(validation_or_test_set, positive_class)
f1_score = self.f1_score(validation_or_test_set, positive_class)

return Table(
{
"metric": ["accuracy", "precision", "recall", "f1_score"],
"value": [accuracy, precision, recall, f1_score],
},
)

def accuracy(self, validation_or_test_set: TabularDataset) -> float:
"""
Compute the accuracy of the classifier on the given data.
Expand Down
58 changes: 45 additions & 13 deletions src/safeds/ml/classical/regression/_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,13 @@ def _get_sklearn_regressor(self) -> RegressorMixin:
The sklearn Regressor.
"""

# noinspection PyProtectedMember
def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float:
# ------------------------------------------------------------------------------------------------------------------
# Metrics
# ------------------------------------------------------------------------------------------------------------------

def summarize_metrics(self, validation_or_test_set: TabularDataset) -> Table:
"""
Compute the mean squared error (MSE) on the given data.
Summarize the regressor's metrics on the given data.
Parameters
----------
Expand All @@ -102,28 +105,57 @@ def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float:
Returns
-------
mean_squared_error:
The calculated mean squared error (the average of the distance of each individual row squared).
metrics:
A table containing the regressor's metrics.
Raises
------
TypeError
If a table is passed instead of a tabular dataset.
"""
from sklearn.metrics import mean_squared_error as sk_mean_squared_error
mean_absolute_error = self.mean_absolute_error(validation_or_test_set)
mean_squared_error = self.mean_squared_error(validation_or_test_set)

return Table(
{
"metric": ["mean_absolute_error", "mean_squared_error"],
"value": [mean_absolute_error, mean_squared_error],
},
)

def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float:
"""
Compute the mean absolute error (MAE) of the regressor on the given data.
Parameters
----------
validation_or_test_set:
The validation or test set.
Returns
-------
mean_absolute_error:
The calculated mean absolute error (the average of the distance of each individual row).
Raises
------
TypeError
If a table is passed instead of a tabular dataset.
"""
from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error

if not isinstance(validation_or_test_set, TabularDataset) and isinstance(validation_or_test_set, Table):
raise PlainTableError
expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

_check_metrics_preconditions(predicted, expected)
return sk_mean_squared_error(expected._data, predicted._data)
return sk_mean_absolute_error(expected._data, predicted._data)

# noinspection PyProtectedMember
def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float:
def mean_squared_error(self, validation_or_test_set: TabularDataset) -> float:
"""
Compute the mean absolute error (MAE) of the regressor on the given data.
Compute the mean squared error (MSE) on the given data.
Parameters
----------
Expand All @@ -132,23 +164,23 @@ def mean_absolute_error(self, validation_or_test_set: TabularDataset) -> float:
Returns
-------
mean_absolute_error:
The calculated mean absolute error (the average of the distance of each individual row).
mean_squared_error:
The calculated mean squared error (the average of the distance of each individual row squared).
Raises
------
TypeError
If a table is passed instead of a tabular dataset.
"""
from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error
from sklearn.metrics import mean_squared_error as sk_mean_squared_error

if not isinstance(validation_or_test_set, TabularDataset) and isinstance(validation_or_test_set, Table):
raise PlainTableError
expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

_check_metrics_preconditions(predicted, expected)
return sk_mean_absolute_error(expected._data, predicted._data)
return sk_mean_squared_error(expected._data, predicted._data)


# noinspection PyProtectedMember
Expand Down
46 changes: 46 additions & 0 deletions tests/safeds/ml/classical/classification/test_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,52 @@ def _get_sklearn_classifier(self) -> ClassifierMixin:
pass


class TestSummarizeMetrics:
@pytest.mark.parametrize(
("predicted", "expected", "result"),
[
(
[1, 2],
[1, 2],
Table(
{
"metric": ["accuracy", "precision", "recall", "f1_score"],
"value": [1.0, 1.0, 1.0, 1.0],
},
),
),
],
)
def test_valid_data(self, predicted: list[float], expected: list[float], result: Table) -> None:
table = Table(
{
"predicted": predicted,
"expected": expected,
},
).to_tabular_dataset(
target_name="expected",
)

assert DummyClassifier().summarize_metrics(table, 1) == result

@pytest.mark.parametrize(
"table",
[
Table(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": [0.0, 1.0, 1.0, 0.0],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
],
ids=["table"],
)
def test_should_raise_if_given_normal_table(self, table: Table) -> None:
with pytest.raises(PlainTableError):
DummyClassifier().summarize_metrics(table, 1) # type: ignore[arg-type]


class TestAccuracy:
def test_with_same_type(self) -> None:
table = Table(
Expand Down
46 changes: 46 additions & 0 deletions tests/safeds/ml/classical/regression/test_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,52 @@ def _get_sklearn_regressor(self) -> RegressorMixin:
pass


class TestSummarizeMetrics:
@pytest.mark.parametrize(
("predicted", "expected", "result"),
[
(
[1, 2],
[1, 2],
Table(
{
"metric": ["mean_absolute_error", "mean_squared_error"],
"value": [0.0, 0.0],
},
),
),
],
)
def test_valid_data(self, predicted: list[float], expected: list[float], result: Table) -> None:
table = Table(
{
"predicted": predicted,
"expected": expected,
},
).to_tabular_dataset(
target_name="expected",
)

assert DummyRegressor().summarize_metrics(table) == result

@pytest.mark.parametrize(
"table",
[
Table(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": [0.0, 1.0, 1.0, 0.0],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
],
ids=["table"],
)
def test_should_raise_if_given_normal_table(self, table: Table) -> None:
with pytest.raises(PlainTableError):
DummyRegressor().summarize_metrics(table) # type: ignore[arg-type]


class TestMeanAbsoluteError:
@pytest.mark.parametrize(
("predicted", "expected", "result"),
Expand Down

0 comments on commit 1cc14b1

Please sign in to comment.