Skip to content

Commit

Permalink
Merge d33a238 into 815b0e2
Browse files Browse the repository at this point in the history
  • Loading branch information
8bit-pixies committed Jul 26, 2019
2 parents 815b0e2 + d33a238 commit d9eec3e
Show file tree
Hide file tree
Showing 5 changed files with 239 additions and 0 deletions.
1 change: 1 addition & 0 deletions optional-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
xgboost==0.6a2
scikit-mdr==0.4.4
skrebate==0.3.4
tensorflow>=1.12.0
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def calculate_version():
'joblib>=0.10.3'],
extras_require={
'xgboost': ['xgboost==0.6a2'],
'tensorflow': ['tensorflow>=1.12.0'],
'skrebate': ['skrebate>=0.3.4'],
'mdr': ['scikit-mdr>=0.4.4'],
'dask': ['dask>=0.18.2',
Expand Down
93 changes: 93 additions & 0 deletions tests/embedding_estimator_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from sklearn.datasets import make_classification, make_regression
from tpot.builtins import EmbeddingEstimator
from sklearn.neural_network import MLPClassifier, MLPRegressor


def test_EmbeddingClassifier_1():
"""Assert that Embedding for classification works as expected."""
X, y = make_classification(random_state=1)
cs = EmbeddingEstimator(MLPClassifier(random_state=1, tol=0.9))
X_transformed = cs.fit_transform(X, y)

# 20 features + 100 embedding size
assert X_transformed.shape[1] == 120


def test_EmbeddingClassifier_2():
"""Assert that correct embedding layer is selected (classifier)."""
X, y = make_classification(random_state=1)
cs = EmbeddingEstimator(
MLPClassifier(hidden_layer_sizes=[20, 10], random_state=1, tol=0.9)
)
cs_2 = EmbeddingEstimator(
MLPClassifier(hidden_layer_sizes=[20, 10], random_state=1, tol=0.9),
embedding_layer=1,
)
X_transformed = cs.fit_transform(X, y)
X_transformed_2 = cs_2.fit_transform(X, y)

assert X_transformed.shape[1] == 30 # 20 features + 20 embedding size
assert X_transformed_2.shape[1] == 40 # 20 features + 20 embedding size


def test_EmbeddingRegressor_1():
"""Assert that Embedding for regressor works as expected."""
X, y = make_regression(n_features=20, random_state=1)
cs = EmbeddingEstimator(MLPRegressor(random_state=1, tol=1000))
X_transformed = cs.fit_transform(X, y)

# 20 features + 100 embedding size
assert X_transformed.shape[1] == 120


def test_EmbeddingRegressor_2():
"""Assert that correct embedding layer is selected (regressor)."""
X, y = make_regression(n_features=20, random_state=1)
cs = EmbeddingEstimator(
MLPRegressor(hidden_layer_sizes=[20, 10], random_state=1, tol=1000)
)
cs_2 = EmbeddingEstimator(
MLPRegressor(hidden_layer_sizes=[20, 10], random_state=1, tol=1000),
embedding_layer=1,
)
X_transformed = cs.fit_transform(X, y)
X_transformed_2 = cs_2.fit_transform(X, y)

assert X_transformed.shape[1] == 30 # 20 features + 20 embedding size
assert X_transformed_2.shape[1] == 40 # 20 features + 20 embedding size


def test_EmbeddingKeras():
"""Check that this works also for keras models"""
try:
import tensorflow as tf
except ImportError:
tf = None
if tf is None:
return
from tensorflow.keras import backend as K
import tensorflow.keras as keras
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

def make_model(input_shape):
model = Sequential()
model.add(Dense(20, activation="relu", input_dim=input_shape))
model.add(Dense(15, activation="relu"))
model.add(Dense(2, activation="softmax"))
model.compile(
optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]
)
return model

X, y = make_classification(random_state=1)
cs = EmbeddingEstimator(KerasClassifier(make_model), backend=K)
cs_2 = EmbeddingEstimator(
KerasClassifier(make_model), embedding_layer=-3, backend=K
)
X_transformed = cs.fit_transform(X, y, verbose=0)
X_transformed_2 = cs_2.fit_transform(X, y, verbose=0)

assert X_transformed.shape[1] == 35 # 20 features + 15 embedding size
assert X_transformed_2.shape[1] == 40 # 20 features + 20 embedding size
1 change: 1 addition & 0 deletions tpot/builtins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@
from .one_hot_encoder import OneHotEncoder, auto_select_categorical_features, _transform_selected
from .feature_transformers import CategoricalSelector, ContinuousSelector
from .feature_set_selector import FeatureSetSelector
from .embedding_estimator import EmbeddingEstimator
143 changes: 143 additions & 0 deletions tpot/builtins/embedding_estimator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""This file is part of the TPOT library.
TPOT was primarily developed at the University of Pennsylvania by:
- Randal S. Olson (rso@randalolson.com)
- Weixuan Fu (weixuanf@upenn.edu)
- Daniel Angell (dpa34@drexel.edu)
- and many more generous open source contributors
TPOT is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
TPOT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with TPOT. If not, see <http://www.gnu.org/licenses/>.
"""

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import check_array
from sklearn.neural_network import MLPClassifier, MLPRegressor


class EmbeddingEstimator(TransformerMixin, BaseEstimator):
"""Meta-transformer for creating neural network embeddings as features.
"""

def __init__(self, estimator, embedding_layer=None, backend=None):
"""Create a StackingEstimator object.
Parameters
----------
estimator: neural network model; either from sklearn or Keras-like.
The estimator to generate embeddings.
embedding_layer: the particular layer used as the embedding.
By default we use the second last layer. Layers are counted with
input layer being `0th` layer; negative indices are allowed.
backend: (optional), the backend we use to query the neural network.
Not required if using scikit-learn interface.
Currently only supports keras-like interface (incl. tensorflow)
"""
second_last_layer = -2
self.estimator = estimator
self.embedding_layer = (
second_last_layer if embedding_layer is None else embedding_layer
)
self.backend = backend

def fit(self, X, y=None, **fit_params):
"""Fit the StackingEstimator meta-transformer.
Parameters
----------
X: array-like of shape (n_samples, n_features)
The training input samples.
y: array-like, shape (n_samples,)
The target values (integers that correspond to classes in classification, real numbers in regression).
fit_params:
Other estimator-specific parameters.
Returns
-------
self: object
Returns a copy of the estimator
"""
if not issubclass(self.estimator.__class__, MLPClassifier) and not issubclass(
self.estimator.__class__, MLPRegressor
):
input_shape = X.shape[1]
self.estimator.sk_params["input_shape"] = input_shape
self.estimator.check_params(self.estimator.sk_params)
self.estimator.fit(X, y, **fit_params)
return self

def transform(self, X):
"""Transform data by adding embedding as features.
Parameters
----------
X: numpy ndarray, {n_samples, n_components}
New data, where n_samples is the number of samples and n_components is the number of components.
Returns
-------
X_transformed: array-like, shape (n_samples, n_features + embedding) where embedding is the size of the embedding layer
The transformed feature set.
"""
X = check_array(X)
X_transformed = np.copy(X)
# add class probabilities as a synthetic feature
if issubclass(self.estimator.__class__, MLPClassifier) or issubclass(
self.estimator.__class__, MLPRegressor
):
X_transformed = np.hstack(
(self._embedding_mlp(self.estimator, X), X_transformed)
)
else:
X_transformed = np.hstack(
(self._embedding_keras(self.estimator, X), X_transformed)
)

return X_transformed

def _embedding_mlp(self, estimator, X):
# see also BaseMultilayerPerceptron._predict from
# https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/neural_network/multilayer_perceptron.py
X = check_array(X, accept_sparse=["csr", "csc", "coo"])

# Make sure self.hidden_layer_sizes is a list
hidden_layer_sizes = estimator.hidden_layer_sizes
if not hasattr(hidden_layer_sizes, "__iter__"):
hidden_layer_sizes = [hidden_layer_sizes]
hidden_layer_sizes = list(hidden_layer_sizes)

layer_units = [X.shape[1]] + hidden_layer_sizes + [estimator.n_outputs_]

# Initialize layers
activations = [X]

for i in range(estimator.n_layers_ - 1):
activations.append(np.empty((X.shape[0], layer_units[i + 1])))
# forward propagate
estimator._forward_pass(activations)
y_embedding = activations[self.embedding_layer]

return y_embedding

def _embedding_keras(self, estimator, X):
X = check_array(X, accept_sparse=["csr", "csc", "coo"])
get_embedding = self.backend.function(
[estimator.model.layers[0].input],
[estimator.model.layers[self.embedding_layer].output],
)
return get_embedding([X])[0]

0 comments on commit d9eec3e

Please sign in to comment.