Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add multi-output regression support for CascadeForestRegressor #40

Merged
merged 5 commits into from
Feb 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Version 0.1.*
.. |Fix| replace:: :raw-html:`<span class="badge badge-danger">Fix</span>` :raw-latex:`{\small\sc [Fix]}`
.. |API| replace:: :raw-html:`<span class="badge badge-warning">API Change</span>` :raw-latex:`{\small\sc [API Change]}`

- |Feature| add multi-output support for :obj:`CascadeForestRegressor` (`#40 <https://github.com/LAMDA-NJU/Deep-Forest/pull/40>`__) @Alex-Medium
- |Feature| add layer-wise feature importances (`#39 <https://github.com/LAMDA-NJU/Deep-Forest/pull/39>`__) @xuyxu
- |Feature| add scikit-learn backend (`#36 <https://github.com/LAMDA-NJU/Deep-Forest/pull/36>`__) @xuyxu
- |Feature| add official support for Mac-OS (`#34 <https://github.com/LAMDA-NJU/Deep-Forest/pull/34>`__) @T-Allen-sudo
Expand Down
31 changes: 21 additions & 10 deletions deepforest/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def model_loadobj(dirname, obj_type, d=None):
obj = load(os.path.join(dirname, "{}.pkl".format(obj_type)))
return obj
elif obj_type == "layer":
from ._layer import Layer # avoid circular import
from ._layer import ClassificationCascadeLayer, RegressionCascadeLayer

if not isinstance(d, dict):
msg = "Loading layers requires the dict from `param.pkl`."
Expand All @@ -316,15 +316,26 @@ def model_loadobj(dirname, obj_type, d=None):
for layer_idx in range(n_layers):

# Build a temporary layer
layer_ = Layer(
layer_idx=layer_idx,
n_classes=d["n_outputs"],
criterion=d["criterion"],
n_estimators=d["n_estimators"],
partial_mode=d["partial_mode"],
buffer=d["buffer"],
verbose=d["verbose"],
)
if d["is_classifier"]:
layer_ = ClassificationCascadeLayer(
layer_idx=layer_idx,
n_outputs=d["n_outputs"],
criterion=d["criterion"],
n_estimators=d["n_estimators"],
partial_mode=d["partial_mode"],
buffer=d["buffer"],
verbose=d["verbose"],
)
else:
layer_ = RegressionCascadeLayer(
layer_idx=layer_idx,
n_outputs=d["n_outputs"],
criterion=d["criterion"],
n_estimators=d["n_estimators"],
partial_mode=d["partial_mode"],
buffer=d["buffer"],
verbose=d["verbose"],
)

for est_type in ("rf", "erf"):
for est_idx in range(n_estimators):
Expand Down
240 changes: 175 additions & 65 deletions deepforest/_layer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
"""Implementation of the forest-based cascade layer."""


__all__ = ["Layer"]
__all__ = [
"BaseCascadeLayer",
"ClassificationCascadeLayer",
"RegressionCascadeLayer",
]

import numpy as np
from sklearn.base import is_classifier
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin

from . import _utils
from ._estimator import Estimator
Expand Down Expand Up @@ -42,11 +48,11 @@ def _build_estimator(
return X_aug_train, estimator


class Layer(object):
class BaseCascadeLayer(BaseEstimator):
def __init__(
self,
layer_idx,
n_classes,
n_outputs,
criterion,
n_estimators=2,
n_trees=100,
Expand All @@ -58,10 +64,9 @@ def __init__(
n_jobs=None,
random_state=None,
verbose=1,
is_classifier=True,
):
self.layer_idx = layer_idx
self.n_classes = n_classes
self.n_outputs = n_outputs
self.criterion = criterion
self.n_estimators = n_estimators * 2 # internal conversion
self.n_trees = n_trees
Expand All @@ -73,7 +78,6 @@ def __init__(
self.n_jobs = n_jobs
self.random_state = random_state
self.verbose = verbose
self.is_classifier = is_classifier
# Internal container
self.estimators_ = {}

Expand Down Expand Up @@ -114,7 +118,7 @@ def _make_estimator(self, estimator_idx, estimator_name):
backend=self.backend,
n_jobs=self.n_jobs,
random_state=random_state,
is_classifier=self.is_classifier,
is_classifier=is_classifier(self),
)

return estimator
Expand All @@ -129,16 +133,87 @@ def _validate_params(self):
msg = "`n_trees` = {} should be strictly positive."
raise ValueError(msg.format(self.n_trees))

def transform(self, X):
"""
Return the concatenated transformation results from all base
estimators."""
n_samples, _ = X.shape
X_aug = np.zeros((n_samples, self.n_outputs * self.n_estimators))
for idx, (key, estimator) in enumerate(self.estimators_.items()):
if self.verbose > 1:
msg = "{} - Evaluating estimator = {:<5} in layer = {}"
key = key.split("-")[-1] + "_" + str(key.split("-")[-2])
print(msg.format(_utils.ctime(), key, self.layer_idx))
if self.partial_mode:
# Load the estimator from the buffer
estimator = self.buffer.load_estimator(estimator)

left, right = self.n_outputs * idx, self.n_outputs * (idx + 1)
X_aug[:, left:right] += estimator.predict(X)

return X_aug

def predict_full(self, X):
"""Return the concatenated predictions from all base estimators."""
n_samples, _ = X.shape
pred = np.zeros((n_samples, self.n_outputs * self.n_estimators))
for idx, (key, estimator) in enumerate(self.estimators_.items()):
if self.verbose > 1:
msg = "{} - Evaluating estimator = {:<5} in layer = {}"
key = key.split("-")[-1] + "_" + str(key.split("-")[-2])
print(msg.format(_utils.ctime(), key, self.layer_idx))
if self.partial_mode:
# Load the estimator from the buffer
estimator = self.buffer.load_estimator(estimator)

left, right = self.n_outputs * idx, self.n_outputs * (idx + 1)
pred[:, left:right] += estimator.predict(X)

return pred


class ClassificationCascadeLayer(BaseCascadeLayer, ClassifierMixin):
"""Implementation of the cascade forest layer for classification."""

def __init__(
self,
layer_idx,
n_outputs,
criterion,
n_estimators=2,
n_trees=100,
max_depth=None,
min_samples_leaf=1,
backend="custom",
partial_mode=False,
buffer=None,
n_jobs=None,
random_state=None,
verbose=1,
):
super().__init__(
layer_idx=layer_idx,
n_outputs=n_outputs,
criterion=criterion,
n_estimators=n_estimators,
n_trees=n_trees,
max_depth=max_depth,
min_samples_leaf=min_samples_leaf,
backend=backend,
partial_mode=partial_mode,
buffer=buffer,
n_jobs=n_jobs,
random_state=random_state,
verbose=verbose,
)

def fit_transform(self, X, y, sample_weight=None):

self._validate_params()
n_samples, self.n_features = X.shape

X_aug = []
if self.is_classifier:
Alex-Medium marked this conversation as resolved.
Show resolved Hide resolved
oob_decision_function = np.zeros((n_samples, self.n_classes))
else:
oob_decision_function = np.zeros((n_samples, 1))
oob_decision_function = np.zeros((n_samples, self.n_outputs))

# A random forest and an extremely random forest will be fitted
for estimator_idx in range(self.n_estimators // 2):
Expand Down Expand Up @@ -179,66 +254,101 @@ def fit_transform(self, X, y, sample_weight=None):

# Set the OOB estimations and validation accuracy
self.oob_decision_function_ = oob_decision_function / self.n_estimators
if self.is_classifier:
y_pred = np.argmax(oob_decision_function, axis=1)
self.val_acc_ = accuracy_score(
Alex-Medium marked this conversation as resolved.
Show resolved Hide resolved
y, y_pred, sample_weight=sample_weight
)
else:
y_pred = self.oob_decision_function_
self.val_acc_ = mean_squared_error(
y, y_pred, sample_weight=sample_weight
)
y_pred = np.argmax(oob_decision_function, axis=1)
self.val_performance_ = accuracy_score(
y, y_pred, sample_weight=sample_weight
)

X_aug = np.hstack(X_aug)
return X_aug

def transform(self, X, is_classifier):
"""
Return the concatenated transformation results from all base
estimators."""
n_samples, _ = X.shape
if is_classifier:
X_aug = np.zeros((n_samples, self.n_classes * self.n_estimators))
else:
X_aug = np.zeros((n_samples, self.n_estimators))
for idx, (key, estimator) in enumerate(self.estimators_.items()):
if self.verbose > 1:
msg = "{} - Evaluating estimator = {:<5} in layer = {}"
key = key.split("-")[-1] + "_" + str(key.split("-")[-2])
print(msg.format(_utils.ctime(), key, self.layer_idx))
if self.partial_mode:
# Load the estimator from the buffer
estimator = self.buffer.load_estimator(estimator)

if is_classifier:
left, right = self.n_classes * idx, self.n_classes * (idx + 1)
else:
left, right = idx, (idx + 1)
X_aug[:, left:right] += estimator.predict(X)
class RegressionCascadeLayer(BaseCascadeLayer, RegressorMixin):
"""Implementation of the cascade forest layer for regression."""

return X_aug
def __init__(
self,
layer_idx,
n_outputs,
criterion,
n_estimators=2,
n_trees=100,
max_depth=None,
min_samples_leaf=1,
backend="custom",
partial_mode=False,
buffer=None,
n_jobs=None,
random_state=None,
verbose=1,
):
super().__init__(
layer_idx=layer_idx,
n_outputs=n_outputs,
criterion=criterion,
n_estimators=n_estimators,
n_trees=n_trees,
max_depth=max_depth,
min_samples_leaf=min_samples_leaf,
backend=backend,
partial_mode=partial_mode,
buffer=buffer,
n_jobs=n_jobs,
random_state=random_state,
verbose=verbose,
)

def predict_full(self, X, is_classifier):
"""Return the concatenated predictions from all base estimators."""
n_samples, _ = X.shape
if is_classifier:
pred = np.zeros((n_samples, self.n_classes * self.n_estimators))
else:
pred = np.zeros((n_samples, self.n_estimators))
for idx, (key, estimator) in enumerate(self.estimators_.items()):
if self.verbose > 1:
msg = "{} - Evaluating estimator = {:<5} in layer = {}"
key = key.split("-")[-1] + "_" + str(key.split("-")[-2])
print(msg.format(_utils.ctime(), key, self.layer_idx))
if self.partial_mode:
# Load the estimator from the buffer
estimator = self.buffer.load_estimator(estimator)
def fit_transform(self, X, y, sample_weight=None):

if is_classifier:
left, right = self.n_classes * idx, self.n_classes * (idx + 1)
else:
left, right = idx, (idx + 1)
pred[:, left:right] += estimator.predict(X)
self._validate_params()
n_samples, self.n_features = X.shape

return pred
X_aug = []
oob_decision_function = np.zeros((n_samples, self.n_outputs))

# A random forest and an extremely random forest will be fitted
for estimator_idx in range(self.n_estimators // 2):
X_aug_, _estimator = _build_estimator(
X,
y,
self.layer_idx,
estimator_idx,
"rf",
self._make_estimator(estimator_idx, "rf"),
oob_decision_function,
self.partial_mode,
self.buffer,
self.verbose,
sample_weight,
)
X_aug.append(X_aug_)
key = "{}-{}-{}".format(self.layer_idx, estimator_idx, "rf")
self.estimators_.update({key: _estimator})

for estimator_idx in range(self.n_estimators // 2):
X_aug_, _estimator = _build_estimator(
X,
y,
self.layer_idx,
estimator_idx,
"erf",
self._make_estimator(estimator_idx, "erf"),
oob_decision_function,
self.partial_mode,
self.buffer,
self.verbose,
sample_weight,
)
X_aug.append(X_aug_)
key = "{}-{}-{}".format(self.layer_idx, estimator_idx, "erf")
self.estimators_.update({key: _estimator})

# Set the OOB estimations and validation mean squared error
self.oob_decision_function_ = oob_decision_function / self.n_estimators
y_pred = self.oob_decision_function_
self.val_performance_ = mean_squared_error(
y, y_pred, sample_weight=sample_weight
)

X_aug = np.hstack(X_aug)
return X_aug
Loading