In [None]:
!pip install xgboost==1.6.1

Collecting xgboost==1.6.1
  Downloading xgboost-1.6.1-py3-none-manylinux2014_x86_64.whl (192.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m192.9/192.9 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: xgboost
  Attempting uninstall: xgboost
    Found existing installation: xgboost 2.0.1
    Uninstalling xgboost-2.0.1:
      Successfully uninstalled xgboost-2.0.1
Successfully installed xgboost-1.6.1


In [1]:
import xgboost
from sklearn.metrics import f1_score

xgboost.__version__

'2.0.1'

In [2]:
from sklearn.base import BaseEstimator
from xgboost import XGBRegressor, XGBClassifier

class XGBoostWithEarlyStop(BaseEstimator):
    def __init__(self, test_size=0.2, **estimator_params):
        self.test_size = test_size
        if self.estimator is not None:
            self.set_params(**estimator_params)

    def set_params(self, **params):
        return self.estimator.set_params(**params)

    def get_params(self, **params):
        return self.estimator.get_params()

    def fit(self, X, y):
        x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=self.test_size)
        self.estimator.fit(x_train, y_train,
                           eval_set=[(x_train, y_train), (x_val, y_val)])
        return self

    def predict(self, X):
        return self.estimator.predict(X)

    def predict_proba(self, X):
        return self.estimator.predict_proba(X)

class XGBoostRegressorWithEarlyStop(XGBoostWithEarlyStop):
    def __init__(self, *args, **kwargs):
        self.estimator = XGBRegressor()
        self._estimator_type = 'regressor'
        super(XGBoostRegressorWithEarlyStop, self).__init__(*args, **kwargs)

class XGBoostClassifierWithEarlyStop(XGBoostWithEarlyStop):
    def __init__(self, *args, **kwargs):
        self.estimator = XGBClassifier()
        self._estimator_type = 'classifier'
        super(XGBoostClassifierWithEarlyStop, self).__init__(*args, **kwargs)


In [3]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier


X, y = load_breast_cancer(return_X_y=True, as_frame=True)

# Split your data into training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)

In [6]:
gbc = GradientBoostingClassifier(n_estimators=100, validation_fraction=.2)
lgc = LogisticRegression()
scaler = StandardScaler().set_output(transform='pandas')

In [22]:
p1 = Pipeline([
    ('scaler', scaler),
    ('gbc', gbc)
])
p2 = Pipeline([
    ('scaler', scaler),
    ('lgc', lgc)
])

voting_classifier = VotingClassifier(
    estimators=[
        ('pipe1', p1),
        ('pipe2', p2)
        ],
    voting='soft'
)

voting_classifier

In [24]:
voting_classifier.fit(X_train, y_train)

In [25]:
y_pred = voting_classifier.predict(X_test)

f1_score(y_true=y_test, y_pred=y_pred)

0.923076923076923

In [26]:
xgb = XGBClassifier(n_estimators=10000, early_stopping_rounds=10)
xgb.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)])

[0]	validation_0-logloss:0.47369	validation_1-logloss:0.48629
[1]	validation_0-logloss:0.35799	validation_1-logloss:0.38499
[2]	validation_0-logloss:0.27593	validation_1-logloss:0.32306
[3]	validation_0-logloss:0.21821	validation_1-logloss:0.26930
[4]	validation_0-logloss:0.17378	validation_1-logloss:0.22892
[5]	validation_0-logloss:0.14589	validation_1-logloss:0.20783
[6]	validation_0-logloss:0.12209	validation_1-logloss:0.19011
[7]	validation_0-logloss:0.10632	validation_1-logloss:0.17637
[8]	validation_0-logloss:0.09271	validation_1-logloss:0.16575
[9]	validation_0-logloss:0.08098	validation_1-logloss:0.15818
[10]	validation_0-logloss:0.07135	validation_1-logloss:0.15290
[11]	validation_0-logloss:0.06396	validation_1-logloss:0.14793
[12]	validation_0-logloss:0.05739	validation_1-logloss:0.13669
[13]	validation_0-logloss:0.05237	validation_1-logloss:0.13677
[14]	validation_0-logloss:0.04794	validation_1-logloss:0.12918
[15]	validation_0-logloss:0.04450	validation_1-logloss:0.12895
[1

In [27]:
y_pred = xgb.predict(X_test)

f1_score(y_true=y_test, y_pred=y_pred)

0.9688581314878894

In [28]:
voting_classifier.estimators.append(
    ['xgb', xgb]
)

In [29]:
voting_classifier

In [31]:
y_pred = voting_classifier.predict(X_test)
f1_score(y_true=y_test, y_pred=y_pred)

0.923076923076923

In [32]:
import pickle

In [None]:
xgboost.__version__

'1.6.1'

In [None]:
pickle.dump(pipe, open('./test.pkl', 'wb'))

In [None]:
p = pickle.load(open('./test.pkl', 'rb'))
p