In [1]:
import numpy as np
import os
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('always')

from sklearn.metrics import mean_absolute_error, classification_report
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier
from sklearn.datasets import make_multilabel_classification, make_regression

from sklearn.utils.validation import _check_fit_params
from sklearn.base import is_classifier
from sklearn.utils.fixes import delayed
from joblib import Parallel
from sklearn.multioutput import _fit_estimator, check_classification_targets

In [2]:
class CustomMultiOutputRegressor(MultiOutputRegressor):
    def fit(self, X, y, sample_weight=None, **fit_params):
        if not hasattr(self.estimator, "fit"):
            raise ValueError("The base estimator should implement a fit method")
        X, y = self._validate_data(X, y, force_all_finite=False, multi_output=True, accept_sparse=True)
        if is_classifier(self):
            check_classification_targets(y)
        if y.ndim == 1:
            raise ValueError("y must have at least two dimensions for multi-output regression but has only one.")
        if (sample_weight is not None and not has_fit_parameter(self.estimator, 'sample_weight')):
            raise ValueError("Underlying estimator does not support sample weights.")
        
        fit_params_validated = _check_fit_params(X, fit_params)
        [(X_train, Y_train), (X_test, Y_test)] = fit_params_validated.pop('eval_set')
        
        self.estimators_ = Parallel(n_jobs=self.n_jobs)(delayed(_fit_estimator)(self.estimator,
                                                                                X,
                                                                                y[:, i],
                                                                                sample_weight,
                                                                                **fit_params_validated,
                                                                                eval_set=[(X_train, Y_train[:, i]),
                                                                                          (X_test, Y_test[:, i])])
                                                        for i in range(y.shape[1]))
        return self

# Regression model

In [3]:
data, label = make_regression(n_samples=15000, n_features=50, n_targets=10, random_state=0)

X, X_test, y, y_test = train_test_split(data, label, test_size=0.25, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, random_state=0)

print(X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape, y_test.shape)

(5625, 50) (5625, 50) (3750, 50) (5625, 10) (5625, 10) (3750, 10)


In [4]:
params = {'eta': 0.3, 
          'max_depth': 5,  
          'objective': 'reg:squarederror',  
          'learning_rate':0.01,
          'n_estimators': 1000}

reg = CustomMultiOutputRegressor(xgb.XGBRegressor(**params)).fit(X_train,
                                                                 y_train,
                                                                 early_stopping_rounds = 5,
                                                                 eval_metric = ['rmse'],
                                                                 eval_set = [(X_train, y_train), (X_val, y_val)],
                                                                 verbose = False)

# Classification model

In [5]:
class CustomMultiOutputClassifier(MultiOutputClassifier):
    def fit(self, X, y, sample_weight=None, **fit_params):
        if not hasattr(self.estimator, "fit"):
            raise ValueError("The base estimator should implement a fit method")
        X, y = self._validate_data(X, y, force_all_finite=False, multi_output=True, accept_sparse=True)
        if is_classifier(self):
            check_classification_targets(y)
        if y.ndim == 1:
            raise ValueError("y must have at least two dimensions for multi-output regression but has only one.")
        if (sample_weight is not None and not has_fit_parameter(self.estimator, 'sample_weight')):
            raise ValueError("Underlying estimator does not support sample weights.")
        
        fit_params_validated = _check_fit_params(X, fit_params)
        [(X_train, Y_train), (X_test, Y_test)] = fit_params_validated.pop('eval_set')
        
        self.estimators_ = Parallel(n_jobs=self.n_jobs)(delayed(_fit_estimator)(self.estimator,
                                                                                X,
                                                                                y[:, i],
                                                                                sample_weight,
                                                                                **fit_params_validated,
                                                                                eval_set=[(X_train, Y_train[:, i]),
                                                                                          (X_test, Y_test[:, i])])
                                                        for i in range(y.shape[1]))
        return self

In [6]:
data, label = make_multilabel_classification(n_samples=15000, n_features=50, n_classes=10, n_labels=1, random_state=0)

X, X_test, y, y_test = train_test_split(data, label, test_size=0.25, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, random_state=0)

print(X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape, y_test.shape)

(5625, 50) (5625, 50) (3750, 50) (5625, 10) (5625, 10) (3750, 10)


In [7]:
params = {'eta': 0.3, 
          'max_depth': 5,  
          'objective': 'multi:softmax',
          'num_class': 2,
          'learning_rate':0.01,
          'use_label_encoder':False,
          'n_estimators': 1000}

cls = CustomMultiOutputClassifier(xgb.XGBClassifier(**params)).fit(X_train,
                                                                   y_train,
                                                                   early_stopping_rounds = 5,
                                                                   eval_metric = ['merror','mlogloss'],
                                                                   eval_set = [(X_train, y_train), (X_val, y_val)],
                                                                   verbose = False)