# XGBoost with Scikit-learn API

XGBoost provides full compatibility with Scikit-learn via:
- `XGBRegressor` and `XGBClassifier`
- Integration with `GridSearchCV` and `RandomizedSearchCV`
- Cross-validation and pipelines


In [13]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes, load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, accuracy_score
from xgboost import XGBRegressor, XGBClassifier, XGBRFRegressor

import warnings
warnings.filterwarnings('ignore')

## Regression with GridSearchCV

In [14]:
# Regression Example
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

xgb_reg = XGBRegressor(random_state=42)

param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [3, 4, 5],
    'learning_rate': [0.01, 0.1],
    'subsample': [0.8, 1.0]
}

grid = GridSearchCV(xgb_reg, param_grid, scoring='neg_root_mean_squared_error', cv=3, n_jobs=-1)
grid.fit(X_train, y_train)

print("Best Params:", grid.best_params_)
print("Best RMSE:", -grid.best_score_)

Best Params: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.8}
Best RMSE: 59.020226760718


# Classification with RandomSearchCV

In [15]:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

xgb_clf = XGBClassifier(random_state=42)

param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [2, 3, 4, 5],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.6, 0.8, 1.0]
}

search = RandomizedSearchCV(xgb_clf, param_distributions=param_dist, n_iter=10, cv=3, n_jobs=-1, random_state=42)
search.fit(X_train, y_train)

print("Best Params:", search.best_params_)
print("Best Accuracy:", search.best_score_)

Best Params: {'subsample': 0.8, 'n_estimators': 50, 'max_depth': 4, 'learning_rate': 0.01}
Best Accuracy: 0.9583333333333334


# XGBRFRegressor (Random Forest Style)

In [16]:
rf_reg = XGBRFRegressor(n_estimators=200, max_depth=4, learning_rate=0.1, subsample=0.8, random_state=42)
rf_reg.fit(X_train, y_train)
rf_preds = rf_reg.predict(X_test)
rmse_rf = mean_squared_error(y_test, rf_preds, squared=False)

print(f"XGBRFRegressor RMSE: {rmse_rf:.3f}")

XGBRFRegressor RMSE: 0.754


### Key Points

- `XGBRegressor` and `XGBClassifier` follow Scikit-learn conventions.
- Work seamlessly with cross-validation and hyperparameter tuning.
- `XGBRFRegressor` offers a random forest-style ensemble built on the same XGBoost engine.
