feat: Added parameter number_of_trees to GradientBoosting (#268)

Closes #170. ### Summary of Changes Added parameter `number_of_trees` to `GradientBoosting` --------- Co-authored-by: alex-senger <91055000+alex-senger@users.noreply.github.com> Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Co-authored-by: Lars Reimann <mail@larsreimann.com>
Safe-DS · May 5, 2023 · 766f2ff · 766f2ff
1 parent a88eb8b
commit 766f2ff
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 12 deletions.
diff --git a/src/safeds/ml/classical/classification/_gradient_boosting.py b/src/safeds/ml/classical/classification/_gradient_boosting.py
@@ -13,26 +13,33 @@
 
 
 class GradientBoosting(Classifier):
-    """Gradient boosting classification.
+    """
+    Gradient boosting classification.
 
     Parameters
     ----------
+    number_of_trees: int
+        The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large
+        number usually results in better performance.
     learning_rate : float
         The larger the value, the more the model is influenced by each additional tree. If the learning rate is too
         low, the model might underfit. If the learning rate is too high, the model might overfit.
 
     Raises
     ------
     ValueError
-        If `learning_rate` is non-positive.
+        If `learning_rate` is non-positive or the `number_of_trees` is less than or equal to 0.
     """
 
-    def __init__(self, learning_rate: float = 0.1) -> None:
+    def __init__(self, number_of_trees: int = 100, learning_rate: float = 0.1) -> None:
         # Validation
+        if number_of_trees <= 0:
+            raise ValueError("The number of boosting stages to perform has to be greater than 0.")
         if learning_rate <= 0:
             raise ValueError("The learning rate has to be greater than 0.")
 
         # Hyperparameters
+        self._number_of_trees = number_of_trees
         self._learning_rate = learning_rate
 
         # Internal state
@@ -61,10 +68,13 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
         LearningError
             If the training data contains invalid values or if the training failed.
         """
-        wrapped_classifier = sk_GradientBoostingClassifier(learning_rate=self._learning_rate)
+        wrapped_classifier = sk_GradientBoostingClassifier(
+            n_estimators=self._number_of_trees,
+            learning_rate=self._learning_rate,
+        )
         fit(wrapped_classifier, training_set)
 
-        result = GradientBoosting(learning_rate=self._learning_rate)
+        result = GradientBoosting(number_of_trees=self._number_of_trees, learning_rate=self._learning_rate)
         result._wrapped_classifier = wrapped_classifier
         result._feature_names = training_set.features.column_names
         result._target_name = training_set.target.name

diff --git a/src/safeds/ml/classical/regression/_gradient_boosting.py b/src/safeds/ml/classical/regression/_gradient_boosting.py
@@ -13,26 +13,33 @@
 
 
 class GradientBoosting(Regressor):
-    """Gradient boosting regression.
+    """
+    Gradient boosting regression.
 
     Parameters
     ----------
+    number_of_trees: int
+        The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large
+        number usually results in better performance.
     learning_rate : float
         The larger the value, the more the model is influenced by each additional tree. If the learning rate is too
         low, the model might underfit. If the learning rate is too high, the model might overfit.
 
     Raises
     ------
     ValueError
-        If `learning_rate` is non-positive.
+        If `learning_rate` is non-positive or the `number_of_trees` is less than or equal to 0.
     """
 
-    def __init__(self, learning_rate: float = 0.1) -> None:
+    def __init__(self, number_of_trees: int = 100, learning_rate: float = 0.1) -> None:
         # Validation
+        if number_of_trees <= 0:
+            raise ValueError("The number of boosting stages to perform has to be greater than 0.")
         if learning_rate <= 0:
             raise ValueError("The learning rate has to be greater than 0.")
 
         # Hyperparameters
+        self._number_of_trees = number_of_trees
         self._learning_rate = learning_rate
 
         # Internal state
@@ -61,10 +68,13 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
         LearningError
             If the training data contains invalid values or if the training failed.
         """
-        wrapped_regressor = sk_GradientBoostingRegressor(learning_rate=self._learning_rate)
+        wrapped_regressor = sk_GradientBoostingRegressor(
+            n_estimators=self._number_of_trees,
+            learning_rate=self._learning_rate,
+        )
         fit(wrapped_regressor, training_set)
 
-        result = GradientBoosting(learning_rate=self._learning_rate)
+        result = GradientBoosting(number_of_trees=self._number_of_trees, learning_rate=self._learning_rate)
         result._wrapped_regressor = wrapped_regressor
         result._feature_names = training_set.features.column_names
         result._target_name = training_set.target.name

diff --git a/tests/safeds/ml/classical/classification/test_gradient_boosting.py b/tests/safeds/ml/classical/classification/test_gradient_boosting.py
@@ -20,5 +20,20 @@ def test_should_be_passed_to_sklearn(self, training_set: TaggedTable) -> None:
         assert fitted_model._wrapped_classifier.learning_rate == 2
 
     def test_should_raise_if_less_than_or_equal_to_0(self) -> None:
-        with pytest.raises(ValueError, match="has to be greater than 0"):
+        with pytest.raises(ValueError, match="The learning rate has to be greater than 0."):
             GradientBoosting(learning_rate=-1)
+
+
+class TestNumberOfTrees:
+    def test_should_be_passed_to_fitted_model(self, training_set: TaggedTable) -> None:
+        fitted_model = GradientBoosting(number_of_trees=2).fit(training_set)
+        assert fitted_model._number_of_trees == 2
+
+    def test_should_be_passed_to_sklearn(self, training_set: TaggedTable) -> None:
+        fitted_model = GradientBoosting(number_of_trees=2).fit(training_set)
+        assert fitted_model._wrapped_classifier is not None
+        assert fitted_model._wrapped_classifier.n_estimators == 2
+
+    def test_should_raise_if_less_than_1(self) -> None:
+        with pytest.raises(ValueError, match="The number of boosting stages to perform has to be greater than 0."):
+            GradientBoosting(number_of_trees=0)
diff --git a/tests/safeds/ml/classical/regression/test_gradient_boosting.py b/tests/safeds/ml/classical/regression/test_gradient_boosting.py
@@ -20,5 +20,20 @@ def test_should_be_passed_to_sklearn(self, training_set: TaggedTable) -> None:
         assert fitted_model._wrapped_regressor.learning_rate == 2
 
     def test_should_raise_if_less_than_or_equal_to_0(self) -> None:
-        with pytest.raises(ValueError, match="has to be greater than 0"):
+        with pytest.raises(ValueError, match="The learning rate has to be greater than 0."):
             GradientBoosting(learning_rate=-1)
+
+
+class TestNumberOfTrees:
+    def test_should_be_passed_to_fitted_model(self, training_set: TaggedTable) -> None:
+        fitted_model = GradientBoosting(number_of_trees=2).fit(training_set)
+        assert fitted_model._number_of_trees == 2
+
+    def test_should_be_passed_to_sklearn(self, training_set: TaggedTable) -> None:
+        fitted_model = GradientBoosting(number_of_trees=2).fit(training_set)
+        assert fitted_model._wrapped_regressor is not None
+        assert fitted_model._wrapped_regressor.n_estimators == 2
+
+    def test_should_raise_if_less_than_1(self) -> None:
+        with pytest.raises(ValueError, match="The number of boosting stages to perform has to be greater than 0."):
+            GradientBoosting(number_of_trees=0)