## Container Runtime HPO Example
This example notebook demonstrates how to use the container runtime HPO API to train a simple XGBoost model with Bayesian optimization, random search, and grid search. It highlights both single-node and multi-node HPO—powered by the same API—with multi-node support enabled through an optional scale_cluster call. The notebook also shows how to retrieve and analyze HPO results via the API.

In [1]:
import xgboost as xgb
from snowflake.ml.data.data_connector import DataConnector
from snowflake.ml.modeling import tune
from snowflake.ml.modeling.tune import get_tuner_context
from sklearn import datasets
from snowflake.ml.modeling.tune.search import BayesOpt, RandomSearch, GridSearch
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from snowflake.snowpark.context import get_active_session

### Data Ingestion & Define Training Function

In [None]:
######### STEP 0: FOLLOWING CODE SHOULD ALREADY BE AUTO-GENERATED IN SNOWFLAKE NOTEBOOK ##########

session = get_active_session()
    
######### STEP 1: GENERATE ARTIFICIAL TRAINING DATA FOR ILLUSTRATION PURPOSES ##########
X, y = datasets.load_digits(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
X_train = X_train.assign(target=y_train).reset_index(drop=True)
X_test = X_test.assign(target=y_test).reset_index(drop=True)
dataset_map = {
    "train": DataConnector.from_dataframe(session.create_dataframe(X_train)),
    "test": DataConnector.from_dataframe(session.create_dataframe(X_test)),
}

######### STEP 2: DEFINE TRAINING FUNCTION ##########
def train_func():
    tuner_context = get_tuner_context()
    config = tuner_context.get_hyper_params()
    dm = tuner_context.get_dataset_map()
    train_df = dm["train"].to_pandas()
    test_df = dm["test"].to_pandas()
    train_labels = train_df['"target"']
    train_features = train_df.drop(columns=['"target"'])
    test_labels = test_df['"target"']
    test_features = test_df.drop(columns=['"target"'])
    model = xgb.XGBClassifier(
        **{k: int(v) if k != "learning_rate" else v for k, v in config.items()},
        random_state=42,
    )
    model.fit(train_features, train_labels)
    accuracy = accuracy_score(
        test_labels, model.predict(test_features)
    )
    tuner_context.report(metrics={"accuracy": accuracy}, model=model)

### [OPTIONAL STEP] Scale Up Cluster To Enable Multi-Node HPO

In [None]:
from snowflake.ml.runtime_cluster import scale_cluster

scale_cluster(2) # scale up from single node to two nodes

### Bayesian Optimization Search

In [None]:
######### STEP 3: START HPO RUN With Bayes Opt Search ##########
tuner = tune.Tuner(
    train_func=train_func,
    search_space={
        "n_estimators": tune.uniform(50, 200),
        "max_depth": tune.uniform(3, 10),
        "learning_rate": tune.uniform(0.01, 0.3),
    },
    tuner_config=tune.TunerConfig(
        metric="accuracy",
        mode="max",
        search_alg=BayesOpt(),
        num_trials=3, # Increase num_trials for broader exploration and potentially better model performance
    ),
)

tuner_results = tuner.run(dataset_map=dataset_map)

In [None]:
######### STEP 4: EVALUATE THE HPO RUN RESULT ##########

tuner_results.best_result

In [None]:
tuner_results.best_model

### Random Search

In [None]:
######### START HPO RUN With Random Search ##########

tuner = tune.Tuner(
    train_func=train_func,
    search_space={
        "n_estimators": tune.uniform(50, 200),
        "max_depth": tune.uniform(3, 10),
        "learning_rate": tune.uniform(0.01, 0.3),
    },
    tuner_config=tune.TunerConfig(
        metric="accuracy",
        mode="max",
        search_alg=RandomSearch(),
        num_trials=3,  # Increase num_trials for broader exploration and potentially better model performance
    ),
)

tuner_results = tuner.run(dataset_map=dataset_map)

In [None]:
tuner_results.results

### Grid Search

In [None]:
######### START HPO RUN With Grid Search ##########

tuner = tune.Tuner(
    train_func=train_func,
    search_space = {
        "n_estimators": [50, 51],
        "max_depth": [4,5],
        "learning_rate": [0.01, 0.03]
    },
    tuner_config=tune.TunerConfig(
        metric="accuracy",
        mode="max",
        search_alg=GridSearch(),
        max_concurrent_trials=2,  # (Optional) Maximum number of trials to run concurrently. If not set, defaults to the number of nodes in the cluster.
        resource_per_trial={"CPU": 1},   # (Optional) Pre-configured for reliability; modification is rarely necessary.
    ),
)

tuner_results = tuner.run(dataset_map=dataset_map)

In [None]:
# In this example, each parameter has 2 possible values, so the total number of unique combinations is 2 × 2 × 2 = 8.
tuner_results.results