# AutoML: Regression (XGBoost)

This template trains a regression model using XGBoost.

- Parameters are injected via Papermill.
- Data is loaded via `load_dataset_as_dataframe()`.
- Model artifacts are registered via MLModelRegistry (best-effort).



In [None]:
# Parameters (Papermill)

dataset_id = ""  # UUID string
target_column = "target"
feature_columns = []  # list[str]; empty means infer numeric columns
test_size = 0.2
random_state = 42
model_name = "automl_regression_xgboost"



In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

from amprenta_rag.notebook.automl_helpers import (
    generate_regression_report,
    load_dataset_as_dataframe,
    register_trained_model,
)

try:
    import xgboost as xgb
except Exception as e:
    raise ImportError("xgboost is required for this template") from e



In [None]:
df = load_dataset_as_dataframe(dataset_id)

if not target_column or target_column not in df.columns:
    raise ValueError(f"target_column '{target_column}' missing")

if feature_columns:
    X = df[feature_columns]
else:
    X = df.select_dtypes(include=["number"]).drop(columns=[target_column], errors="ignore")

y = df[target_column].astype(float)

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=float(test_size),
    random_state=int(random_state),
)

reg = xgb.XGBRegressor(
    n_estimators=500,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.9,
    colsample_bytree=0.9,
    reg_lambda=1.0,
    random_state=int(random_state),
)
reg.fit(X_train, y_train)

pred = reg.predict(X_test)
metrics = generate_regression_report(y_test, pred)
metrics



In [None]:
# Register model in MLModelRegistry (best-effort)
try:
    reg_entry = register_trained_model(
        reg,
        name=model_name,
        metrics={k: float(v) for k, v in metrics.items()},
        dataset_id=dataset_id,
        model_type="automl_regression",
        framework="xgboost",
        features=list(X.columns),
        hyperparameters=getattr(reg, "get_params", lambda: {})(),
        description="AutoML regression template (XGBoost)",
    )
    reg_entry
except Exception as e:
    print(f"Model registry not available in this environment: {e}")

