Skip to content

Commit

Permalink
feat: Added parameter number_of_trees to GradientBoosting (#268)
Browse files Browse the repository at this point in the history
Closes #170.

### Summary of Changes

Added parameter `number_of_trees` to `GradientBoosting`

---------

Co-authored-by: alex-senger <91055000+alex-senger@users.noreply.github.com>
Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
Co-authored-by: Lars Reimann <mail@larsreimann.com>
  • Loading branch information
4 people committed May 5, 2023
1 parent a88eb8b commit 766f2ff
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 12 deletions.
20 changes: 15 additions & 5 deletions src/safeds/ml/classical/classification/_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,33 @@


class GradientBoosting(Classifier):
"""Gradient boosting classification.
"""
Gradient boosting classification.
Parameters
----------
number_of_trees: int
The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large
number usually results in better performance.
learning_rate : float
The larger the value, the more the model is influenced by each additional tree. If the learning rate is too
low, the model might underfit. If the learning rate is too high, the model might overfit.
Raises
------
ValueError
If `learning_rate` is non-positive.
If `learning_rate` is non-positive or the `number_of_trees` is less than or equal to 0.
"""

def __init__(self, learning_rate: float = 0.1) -> None:
def __init__(self, number_of_trees: int = 100, learning_rate: float = 0.1) -> None:
# Validation
if number_of_trees <= 0:
raise ValueError("The number of boosting stages to perform has to be greater than 0.")
if learning_rate <= 0:
raise ValueError("The learning rate has to be greater than 0.")

# Hyperparameters
self._number_of_trees = number_of_trees
self._learning_rate = learning_rate

# Internal state
Expand Down Expand Up @@ -61,10 +68,13 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
LearningError
If the training data contains invalid values or if the training failed.
"""
wrapped_classifier = sk_GradientBoostingClassifier(learning_rate=self._learning_rate)
wrapped_classifier = sk_GradientBoostingClassifier(
n_estimators=self._number_of_trees,
learning_rate=self._learning_rate,
)
fit(wrapped_classifier, training_set)

result = GradientBoosting(learning_rate=self._learning_rate)
result = GradientBoosting(number_of_trees=self._number_of_trees, learning_rate=self._learning_rate)
result._wrapped_classifier = wrapped_classifier
result._feature_names = training_set.features.column_names
result._target_name = training_set.target.name
Expand Down
20 changes: 15 additions & 5 deletions src/safeds/ml/classical/regression/_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,33 @@


class GradientBoosting(Regressor):
"""Gradient boosting regression.
"""
Gradient boosting regression.
Parameters
----------
number_of_trees: int
The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large
number usually results in better performance.
learning_rate : float
The larger the value, the more the model is influenced by each additional tree. If the learning rate is too
low, the model might underfit. If the learning rate is too high, the model might overfit.
Raises
------
ValueError
If `learning_rate` is non-positive.
If `learning_rate` is non-positive or the `number_of_trees` is less than or equal to 0.
"""

def __init__(self, learning_rate: float = 0.1) -> None:
def __init__(self, number_of_trees: int = 100, learning_rate: float = 0.1) -> None:
# Validation
if number_of_trees <= 0:
raise ValueError("The number of boosting stages to perform has to be greater than 0.")
if learning_rate <= 0:
raise ValueError("The learning rate has to be greater than 0.")

# Hyperparameters
self._number_of_trees = number_of_trees
self._learning_rate = learning_rate

# Internal state
Expand Down Expand Up @@ -61,10 +68,13 @@ def fit(self, training_set: TaggedTable) -> GradientBoosting:
LearningError
If the training data contains invalid values or if the training failed.
"""
wrapped_regressor = sk_GradientBoostingRegressor(learning_rate=self._learning_rate)
wrapped_regressor = sk_GradientBoostingRegressor(
n_estimators=self._number_of_trees,
learning_rate=self._learning_rate,
)
fit(wrapped_regressor, training_set)

result = GradientBoosting(learning_rate=self._learning_rate)
result = GradientBoosting(number_of_trees=self._number_of_trees, learning_rate=self._learning_rate)
result._wrapped_regressor = wrapped_regressor
result._feature_names = training_set.features.column_names
result._target_name = training_set.target.name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,20 @@ def test_should_be_passed_to_sklearn(self, training_set: TaggedTable) -> None:
assert fitted_model._wrapped_classifier.learning_rate == 2

def test_should_raise_if_less_than_or_equal_to_0(self) -> None:
with pytest.raises(ValueError, match="has to be greater than 0"):
with pytest.raises(ValueError, match="The learning rate has to be greater than 0."):
GradientBoosting(learning_rate=-1)


class TestNumberOfTrees:
def test_should_be_passed_to_fitted_model(self, training_set: TaggedTable) -> None:
fitted_model = GradientBoosting(number_of_trees=2).fit(training_set)
assert fitted_model._number_of_trees == 2

def test_should_be_passed_to_sklearn(self, training_set: TaggedTable) -> None:
fitted_model = GradientBoosting(number_of_trees=2).fit(training_set)
assert fitted_model._wrapped_classifier is not None
assert fitted_model._wrapped_classifier.n_estimators == 2

def test_should_raise_if_less_than_1(self) -> None:
with pytest.raises(ValueError, match="The number of boosting stages to perform has to be greater than 0."):
GradientBoosting(number_of_trees=0)
17 changes: 16 additions & 1 deletion tests/safeds/ml/classical/regression/test_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,20 @@ def test_should_be_passed_to_sklearn(self, training_set: TaggedTable) -> None:
assert fitted_model._wrapped_regressor.learning_rate == 2

def test_should_raise_if_less_than_or_equal_to_0(self) -> None:
with pytest.raises(ValueError, match="has to be greater than 0"):
with pytest.raises(ValueError, match="The learning rate has to be greater than 0."):
GradientBoosting(learning_rate=-1)


class TestNumberOfTrees:
def test_should_be_passed_to_fitted_model(self, training_set: TaggedTable) -> None:
fitted_model = GradientBoosting(number_of_trees=2).fit(training_set)
assert fitted_model._number_of_trees == 2

def test_should_be_passed_to_sklearn(self, training_set: TaggedTable) -> None:
fitted_model = GradientBoosting(number_of_trees=2).fit(training_set)
assert fitted_model._wrapped_regressor is not None
assert fitted_model._wrapped_regressor.n_estimators == 2

def test_should_raise_if_less_than_1(self) -> None:
with pytest.raises(ValueError, match="The number of boosting stages to perform has to be greater than 0."):
GradientBoosting(number_of_trees=0)

0 comments on commit 766f2ff

Please sign in to comment.