<a href="https://colab.research.google.com/github/JaehyunAhn/AI_for_Education/blob/master/VotingClassifier_ensemble_with_earlystopping_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [148]:
from sklearn.base import BaseEstimator
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor, XGBClassifier

class XGBoostWithEarlyStop(BaseEstimator):
    def __init__(self, test_size=0.2, **estimator_params):
        self.test_size = test_size
        if self.estimator is not None:
            self.set_params(**estimator_params)

    def set_params(self, **params):
        return self.estimator.set_params(**params)

    def get_params(self, **params):
        return self.estimator.get_params()

    def fit(self, X, y):
        x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=self.test_size)
        self.estimator.fit(x_train, y_train,
                           eval_set=[(x_train, y_train), (x_val, y_val)])
        return self

    def predict(self, X):
        return self.estimator.predict(X)

    def predict_proba(self, X):
        return self.estimator.predict_proba(X)

class XGBoostRegressorWithEarlyStop(XGBoostWithEarlyStop):
    def __init__(self, *args, **kwargs):
        self.estimator = XGBRegressor()
        self._estimator_type = 'regressor'
        super(XGBoostRegressorWithEarlyStop, self).__init__(*args, **kwargs)

class XGBoostClassifierWithEarlyStop(XGBoostWithEarlyStop):
    def __init__(self, *args, **kwargs):
        self.estimator = XGBClassifier()
        self._estimator_type = 'classifier'
        super(XGBoostClassifierWithEarlyStop, self).__init__(*args, **kwargs)


In [154]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LogisticRegression


X, y = load_breast_cancer(return_X_y=True)

# Split your data into training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)

# Define your preprocessing steps in the pipeline
scaler = StandardScaler()  # You can replace this with any preprocessing steps you need

xgb_model = XGBoostClassifierWithEarlyStop(
    objective="binary:logistic",
    eval_metric="aucpr",
    n_estimators=600,  # Set a large number
    max_depth=6,
    learning_rate=0.1,
    verbose_eval=True,
    early_stopping_rounds=3
)

logistic_model = LogisticRegression()

# Create the VotingClassifier with only the XGBoost model

voting_classifier = VotingClassifier(
    estimators=[
        ('xgb', xgb_model),
        ('lgr', logistic_model)
        ],
    voting='soft'
)

pipe = Pipeline(
    [
        ('scaler', scaler),
        ('clf', voting_classifier)
    ]
)

# Fit the VotingClassifier with training data
pipe.fit(X_train, y_train)

# Use the VotingClassifier to make predictions on the validation data
y_pred = pipe.predict(X_test)

[0]	validation_0-aucpr:0.99600	validation_1-aucpr:0.90025
[1]	validation_0-aucpr:0.99801	validation_1-aucpr:0.89746
[2]	validation_0-aucpr:0.99817	validation_1-aucpr:0.89746
[3]	validation_0-aucpr:0.99740	validation_1-aucpr:0.89746


Parameters: { "verbose_eval" } are not used.



In [158]:
# xgb_model.set_params(early_stopping_rounds=7)
# xgb_model.fit(X_train, y_train)

In [156]:
# xgb_model.get_params()

In [157]:
from sklearn.metrics import f1_score

f1_score(y_true=y_test, y_pred=y_pred)

0.9846153846153847