feat: set learning rate of Gradient Boosting models (#253)

Closes #168. ### Summary of Changes Add `learning_rate` parameter to `GradientBoosting` classifier and regressor. --------- Co-authored-by: Lars Reimann <mail@larsreimann.com>
Safe-DS · Apr 28, 2023 · 9ffaf55 · 9ffaf55
1 parent 8eea3dd
commit 9ffaf55
Show file tree

Hide file tree

Showing 4 changed files with 72 additions and 8 deletions.
diff --git a/src/safeds/ml/classical/classification/_gradient_boosting_classification.py b/src/safeds/ml/classical/classification/_gradient_boosting_classification.py
@@ -13,12 +13,27 @@
 
 
 class GradientBoosting(Classifier):
-    """Gradient boosting classification."""
+    """Gradient boosting classification.
 
-    def __init__(self) -> None:
+    Parameters
+    ----------
+    learning_rate : float
+        The larger the value, the more the model is influenced by each additional tree. If the learning rate is too
+        low, the model might underfit. If the learning rate is too high, the model might overfit.
+
+    Raises
+    ------
+    ValueError
+        If `learning_rate` is non-positive.
+    """
+
+    def __init__(self, learning_rate: float = 0.1) -> None:
         self._wrapped_classifier: sk_GradientBoostingClassifier | None = None
         self._feature_names: list[str] | None = None
         self._target_name: str | None = None
+        if learning_rate <= 0:
+            raise ValueError("learning_rate must be positive.")
+        self._learning_rate = learning_rate
 
     def fit(self, training_set: TaggedTable) -> GradientBoosting:
         """
@@ -41,10 +56,10 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
         LearningError
             If the training data contains invalid values or if the training failed.
         """
-        wrapped_classifier = sk_GradientBoostingClassifier()
+        wrapped_classifier = sk_GradientBoostingClassifier(learning_rate=self._learning_rate)
         fit(wrapped_classifier, training_set)
 
-        result = GradientBoosting()
+        result = GradientBoosting(learning_rate=self._learning_rate)
         result._wrapped_classifier = wrapped_classifier
         result._feature_names = training_set.features.column_names
         result._target_name = training_set.target.name

diff --git a/src/safeds/ml/classical/regression/_gradient_boosting_regression.py b/src/safeds/ml/classical/regression/_gradient_boosting_regression.py
@@ -13,12 +13,27 @@
 
 
 class GradientBoosting(Regressor):
-    """Gradient boosting regression."""
+    """Gradient boosting regression.
 
-    def __init__(self) -> None:
+    Parameters
+    ----------
+    learning_rate : float
+        The larger the value, the more the model is influenced by each additional tree. If the learning rate is too
+        low, the model might underfit. If the learning rate is too high, the model might overfit.
+
+    Raises
+    ------
+    ValueError
+        If `learning_rate` is non-positive.
+    """
+
+    def __init__(self, learning_rate: float = 0.1) -> None:
         self._wrapped_regressor: sk_GradientBoostingRegressor | None = None
         self._feature_names: list[str] | None = None
         self._target_name: str | None = None
+        if learning_rate <= 0:
+            raise ValueError("learning_rate must be positive.")
+        self._learning_rate = learning_rate
 
     def fit(self, training_set: TaggedTable) -> GradientBoosting:
         """
@@ -41,10 +56,10 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
         LearningError
             If the training data contains invalid values or if the training failed.
         """
-        wrapped_regressor = sk_GradientBoostingRegressor()
+        wrapped_regressor = sk_GradientBoostingRegressor(learning_rate=self._learning_rate)
         fit(wrapped_regressor, training_set)
 
-        result = GradientBoosting()
+        result = GradientBoosting(learning_rate=self._learning_rate)
         result._wrapped_regressor = wrapped_regressor
         result._feature_names = training_set.features.column_names
         result._target_name = training_set.target.name

diff --git a/tests/safeds/ml/classical/classification/test_gradient_boosting_classification.py b/tests/safeds/ml/classical/classification/test_gradient_boosting_classification.py
@@ -0,0 +1,17 @@
+import pytest
+from safeds.data.tabular.containers import Table
+from safeds.ml.classical.classification import GradientBoosting
+
+
+def test_should_throw_value_error_if_learning_rate_is_non_positive() -> None:
+    with pytest.raises(ValueError, match="learning_rate must be positive."):
+        GradientBoosting(learning_rate=-1)
+
+
+def test_should_pass_learning_rate_to_sklearn() -> None:
+    training_set = Table.from_dict({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]})
+    tagged_table = training_set.tag_columns("col1")
+
+    regressor = GradientBoosting(learning_rate=2).fit(tagged_table)
+    assert regressor._wrapped_classifier is not None
+    assert regressor._wrapped_classifier.learning_rate == regressor._learning_rate
diff --git a/tests/safeds/ml/classical/regression/test_gradient_boosting_regression.py b/tests/safeds/ml/classical/regression/test_gradient_boosting_regression.py
@@ -0,0 +1,17 @@
+import pytest
+from safeds.data.tabular.containers import Table
+from safeds.ml.classical.regression import GradientBoosting
+
+
+def test_should_throw_value_error_if_learning_rate_is_non_positive() -> None:
+    with pytest.raises(ValueError, match="learning_rate must be positive."):
+        GradientBoosting(learning_rate=-1)
+
+
+def test_should_pass_learning_rate_to_sklearn() -> None:
+    training_set = Table.from_dict({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]})
+    tagged_table = training_set.tag_columns("col1")
+
+    regressor = GradientBoosting(learning_rate=2).fit(tagged_table)
+    assert regressor._wrapped_regressor is not None
+    assert regressor._wrapped_regressor.learning_rate == regressor._learning_rate