In [30]:
from sklearn.compose import TransformedTargetRegressor

import warnings

import numpy as np

from sklearn.base import BaseEstimator, RegressorMixin, clone
from sklearn.utils.validation import check_is_fitted
from sklearn.utils import check_array, _safe_indexing
from sklearn.preprocessing import FunctionTransformer
from sklearn.utils.validation import _deprecate_positional_args
from sklearn.exceptions import NotFittedError

class TransformedTargetClassifier(TransformedTargetRegressor):
   
    def _fit_transformer(self, y):
        """Check transformer and fit transformer.
        Create the default transformer, fit it and make additional inverse
        check on a subset (optional).
        """
        if (self.transformer is not None and
                (self.func is not None or self.inverse_func is not None)):
            raise ValueError("'transformer' and functions 'func'/"
                             "'inverse_func' cannot both be set.")
        elif self.transformer is not None:
            self.transformer_ = clone(self.transformer)
        else:
            if self.func is not None and self.inverse_func is None:
                raise ValueError("When 'func' is provided, 'inverse_func' must"
                                 " also be provided")
            self.transformer_ = FunctionTransformer(
                func=self.func, inverse_func=self.inverse_func, validate=True,
                check_inverse=self.check_inverse)
        # XXX: sample_weight is not currently passed to the
        # transformer. However, if transformer starts using sample_weight, the
        # code should be modified accordingly. At the time to consider the
        # sample_prop feature, it is also a good use case to be considered.
        self.transformer_.fit(y)
        if self.check_inverse:
            idx_selected = slice(None, None, max(1, y.shape[0] // 10))
            y_sel = _safe_indexing(y, idx_selected)
            y_sel_t = self.transformer_.transform(y_sel)
            if not np.allclose(y_sel,
                               self.transformer_.inverse_transform(y_sel_t)):
                warnings.warn("The provided functions or transformer are"
                              " not strictly inverse of each other. If"
                              " you are sure you want to proceed regardless"
                              ", set 'check_inverse=False'", UserWarning)

    def fit(self, X, y, **fit_params):
        """Fit the model according to the given training data.
        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like of shape (n_samples,)
            Target values.
        **fit_params : dict
            Parameters passed to the ``fit`` method of the underlying
            regressor.
        Returns
        -------
        self : object
        """
        y = check_array(y, accept_sparse=False, force_all_finite=True,ensure_2d=False, dtype=None)

        # store the number of dimension of the target to predict an array of
        # similar shape at predict
        self._training_dim = y.ndim

        # transformers are designed to modify X which is 2d dimensional, we
        # need to modify y accordingly.
        if y.ndim == 1:
            y_2d = y.reshape(-1, 1)
        else:
            y_2d = y
        self._fit_transformer(y_2d)

        # transform y and convert back to 1d array if needed
        y_trans = self.transformer_.transform(y_2d)
        # FIXME: a FunctionTransformer can return a 1D array even when validate
        # is set to True. Therefore, we need to check the number of dimension
        # first.
        if y_trans.ndim == 2 and y_trans.shape[1] == 1:
            y_trans = y_trans.squeeze(axis=1)

        if self.regressor is None:
            from ..linear_model import LinearRegression
            self.regressor_ = LinearRegression()
        else:
            self.regressor_ = clone(self.regressor)

        self.regressor_.fit(X, y_trans, **fit_params)

        return self

In [8]:
"""
load dataset for classification
"""
from sklearn.datasets import load_iris
iris = load_iris()

In [18]:
"""
create training and target data
"""
X = iris['data']
Y = iris['target']

In [46]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

"""
create alternate target set where integers are tranformed to strings: 0 = 'a', 1 = 'b' etc.
"""
Y_train_string = np.array([chr(97 + i) for i in Y_train])
Y_test_string = np.array([chr(97 + i) for i in Y_test])

In [36]:
import numpy as np
from xgboost import XGBClassifier

classifier = XGBClassifier(use_label_encoder=False)

In [51]:
"""
valid fit
"""
classifier.fit(X_train , Y_train);
predictions = classifier.predict(X_test)



In [55]:
"""
Fails because XGBoost expects integer labels 0,1,...,n-1 when use_label_encoder=False
Try it out!
"""
classifier.fit(X_train , Y_train_string);

  return bool(asarray(a1 == a2).all())


ValueError: The label must consist of integer labels of form 0, 1, 2, ..., [num_class - 1].

In [52]:
"""
Combine our XGBoost classifier with a transformer that maps string labels to integers 0,1,...,n-1.
We use TransformedTargetClassifier, which inherits from TransformedTargetRegressor.
"""
transformer = OrdinalEncoder()
transformed_classifier = TransformedTargetClassifier(regressor=classifier,transformer=transformer,check_inverse = False)

In [57]:
transformed_classifier.fit(X_train , Y_train_string);
predictions_string = transformed_classifier.predict(X_test)



In [60]:
"""
perform the same string transformation to the output prediction
"""
predictions_transformed = np.array([chr(97 + i) for i in predictions])

In [64]:
"""
Check whether the direct prediction are identical to the predictions using TransformedTargetClassifier
"""
np.all(predictions_string == predictions_transformed)

True