In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI SDK 2.0 Vertex AI Remote Hyperparameter Tuning for OSS ML frameworks

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/training/sdk2_remote_hyperparameter_tuning.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/training/sdk2_remote_hyperparameter_tuning.ipynb">
        <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
    <td>
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/training/sdk2_remote_hyperparameter_tuning.ipynb">
       <img src="https://www.gstatic.com/cloud/images/navigation/vertex-ai.svg" alt="Vertex AI logo">Open in Vertex AI Workbench
    </a>
</table>

## Overview

This tutorial demonstrates how to use Vertex AI SDK 2.0 for remote model hyperparameter tuning of a local model hyperparameter job for OSS ML frameworks.

### Objective

In this tutorial, you learn to use `Vertex AI SDK 2.0` to remotely hyperparameter tune models of various ML frameworks as a local (on-prem) hyperparameter tuning job.

This tutorial uses the following Google Cloud ML services:

- `Vertex AI Training`
- `Vertex AI Remote Hyperparameter Tuning`

The steps performed include:

- Download and split the dataset
- Perform transformations as a Vertex AI remote training.
- For scikit-learn, PyTorch, TensorFlow, PyTorch Lightning, Tabnet
    - Tune the model remotely.
    - Get the best model.

**Local tuning**

```
from google.cloud.aiplatform.private_preview import vertex_ai
from google.cloud.aiplatform.private_preview.vertex_ai import VizierHyperparameterTuner

from my_module import MyModelClass

# Set to False for local training
vertex_ai.init(remote=False)

X, y = pd.DataFrame(...), pd.DataFrame(...)

# Define a function which returns an intialized model. Parameters of this function are tunable.
def get_model_func(learning_rate: float, optimizer: str):
  # Instantiate the class
  return MyModelClass(learning_rate=learning_rate, optimizer=optimizer)

# Define the hyperparameter search space
hparam_space = [
  {
    "parameter_id": "learning_rate",
    "double_value_spec": {
      "min_value": 0.01,
      "max_value": 0.05
    }
  }, {
    "parameter_id": "optimizer",
    "categorical_value_spec": {
      "values"["adam", "sgd"]
    }
  }
]

tuner = VizierHyperparameterTuner(
  get_model_func=get_model_func,
  max_trial_count=3,
  parallel_trial_count=2,
  hparam_space=hparam_space,
  metric_id="accuracy",
  metric_goal="MAXIMIZE",
  max_failed_trial_count=0,
)
                               
# Tune model using Vizier. Tuning and trials run locally.
# `epochs` is passed at runtime to model's fit()/train() call
# (ex: model.fit(X, y, epochs=5)
tuner.fit(X, y, epochs=5...)
```

*Local tuning supported ML frameworks:*
1.  scikit-learn
2.  Custom model
3.  TensorFlow
4.  PyTorch
5.  PyTorch Lightning
6.  TabNet

---

**Remote tuning**
```
from google.cloud.aiplatform.private_preview import vertex_ai
from google.cloud.aiplatform.private_preview.vertex_ai import VizierHyperparameterTuner

import my_module

# Set to True for remote training
vertex_ai.init(remote=True, project="my-project", location="my-location", staging_bucket="gs://my-bucket")

X, y = pd.DataFrame(...), pd.DataFrame(...)

# Define a function which returns an intialized model. Parameters of this function are tunable.
def get_model_func(learning_rate: float, optimizer: str):
  # Wrap the model class with `vertex_ai.remote`
  MyModelClass = vertex_ai.remote(my_module.MyModelClass)

  # Instantiate the class
  model = MyModelClass(learning_rate=learning_rate, optimizer=optimizer)

  # Optionally set remote config
  model.fit.vertex.remote_config.display_name = "MyModelClass-remote-training"
  model.fit.vertex.remote_config.staging_bucket = "gs://my-bucket"
  return model

# Define the hyperparameter search space
hparam_space = [
  {
    "parameter_id": "learning_rate",
    "double_value_spec": {
      "min_value": 0.01,
      "max_value": 0.05
    }
  }, {
    "parameter_id": "optimizer",
    "categorical_value_spec": {
      "values"["adam", "sgd"]
    }
  }
]

tuner = VizierHyperparameterTuner(
  get_model_func=get_model_func,
  max_trial_count=3,
  parallel_trial_count=2,
  hparam_space=hparam_space,
  metric_id="accuracy",
  metric_goal="MAXIMIZE",
  max_failed_trial_count=0,
)
                               
# Tune model using Vizier. Tuning runs locally and trials run in Vertex CustomJobs.
# `epochs` is passed at runtime to model's fit()/train() call
# (ex: model.fit(X, y, epochs=5)
tuner.fit(X, y, epochs=5...)
```

*Remote tuning supported OSS ML frameworks:*
1.  scikit-learn
2.  Custom model
3.  TensorFlow
4.  PyTorch
5.  Pytorch Lightning
6.  TabNet


### Dataset

This tutorial uses the <a href="https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html">IRIS dataset</a>, which predicts the iris species.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI
pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage
pricing](https://cloud.google.com/storage/pricing), and use the [Pricing
Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

## Installation

Install the following packages required to execute this notebook.

In [None]:
! pip3 install --upgrade --quiet google-cloud-aiplatform[preview,autologging]
! pip3 install --upgrade --quiet lightning
! pip3 install --upgrade --quiet tensorflow==2.12

### Colab only: Uncomment the following cell to restart the kernel

In [None]:
# Automatically restart kernel after installs so that your environment can access the new packages
# import IPython

# app = IPython.Application.instance()
# app.kernel.do_shutdown(True)

## Before you begin

### Set your project ID

**If you don't know your project ID**, try the following:
* Run `gcloud config list`.
* Run `gcloud projects list`.
* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

#### Region

You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [None]:
REGION = "us-central1"

### Authenticate your Google Cloud account

Depending on your Jupyter environment, you may have to manually authenticate. Follow the relevant instructions below.

**1. Vertex AI Workbench**
* Do nothing as you are already authenticated.

**2. Local JupyterLab instance, uncomment and run:**

In [None]:
# ! gcloud auth login

**3. Colab, uncomment and run:**

In [None]:
# from google.colab import auth
# auth.authenticate_user()

**4. Service account or other**
* See how to grant Cloud Storage permissions to your service account at https://cloud.google.com/storage/docs/gsutil/commands/iam#ch-examples.

### Create a Cloud Storage bucket

Create a storage bucket to store intermediate artifacts such as datasets.

In [None]:
BUCKET_URI = f"gs://your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [None]:
! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}

### Import libraries and define constants

In [None]:
import pandas as pd
import torch
import vertexai
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from vertexai.preview import VertexModel
from vertexai.preview.hyperparameter_tuning import VizierHyperparameterTuner
from vertexai.preview.tabular_models import TabNetTrainer

## Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project and corresponding bucket.

In [None]:
vertexai.init(
    project=PROJECT_ID,
    location=REGION,
    staging_bucket=BUCKET_URI,
)

## Prepare the dataset

Now load the Iris dataset and split the data into train, retrain and test sets.

In [None]:
dataset = load_iris()

X, X_retrain, y, y_retrain = train_test_split(
    dataset.data, dataset.target, test_size=0.60, random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42
)

print("Data size: ", len(dataset.target))
print("X_train size: ", len(X_train))
print("X_retrain size: ", len(X_retrain))
print("X_test size: ", len(X_test))

## Feature transformation

Next, you do feature transformations on the data using the Vertex AI remote training service.

First, you re-initialize Vertex AI to enable remote training.

In [None]:
# Switch to remote mode for training
vertexai.preview.init(remote=True)

### Execute remote job for fit_transform() on training data

Next, indicate that the `StandardScalar` class is to be executed remotely. Then set up the data transform and call the `fit_transform()` method is executed remotely.

In [None]:
REMOTE_JOB_NAME = "remote-scalar"
REMOTE_JOB_BUCKET = f"{BUCKET_URI}/{REMOTE_JOB_NAME}"

# Wrap classes to enable Vertex remote execution
# Don't need this step after import hook is implemented
StandardScaler = vertexai.preview.remote(StandardScaler)


# Instantiate transformer
transformer = StandardScaler()

# Set training config
transformer.fit_transform.vertex.remote_config.display_name = (
    f"{REMOTE_JOB_NAME}-fit-transformer"
)
transformer.fit_transform.vertex.remote_config.staging_bucket = REMOTE_JOB_BUCKET

# Execute transformer on Vertex
X_train = transformer.fit_transform(X_train)

### Remote transform on test data

In [None]:
# Transform test dataset before calculate test score
transformer.transform.vertex.remote_config.display_name = (
    REMOTE_JOB_NAME + "-transformer"
)
transformer.transform.vertex.remote_config.staging_bucket = REMOTE_JOB_BUCKET

X_test = transformer.transform(X_test)

### Local transform on retrain data

In [None]:
# Switch to local transformation
vertexai.preview.init(remote=False)

X_retrain = transformer.transform(X_retrain)

In [None]:
dataset = load_iris()

X, X_retrain, y, y_retrain = train_test_split(
    dataset.data, dataset.target, test_size=0.60, random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42
)

print("Data size: ", len(dataset.target))
print("X_train size: ", len(X_train))
print("X_retrain size: ", len(X_retrain))
print("X_test size: ", len(X_test))

## scikit-learn

### Remote tuning

First, hyperparameter tune the scikit-learn model as a remote tuning job:

- Reinitialize Vertex AI for remote tuning.
- Set the hyperparameter tuning configuration.
- Invoke the hyperparameter tuning job.
    - Set LogisticRegression for the remote tuning job.
    - Invoke LogisticRegression locally which will launch the remote training job.

In [None]:
MAX_TRIAL_COUNT = 4
PARALLEL_TRIAL_COUNT = 2

HPARAM_SPACE = [
    {"parameter_id": "C", "discrete_value_spec": {"values": [0.1, 0.5, 1.0]}}
]
# Use LogisticRegression's score() function
METRIC_ID = "custom"
METRIC_GOAL = "MAXIMIZE"

In [None]:
REMOTE_JOB_NAME = "test-sdk2-remote-training"
REMOTE_JOB_BUCKET = "/".join([BUCKET_URI, REMOTE_JOB_NAME])

# Switch to remote mode for training
vertexai.preview.init(remote=True)


def get_model_func(C: float):
    from sklearn.linear_model import _logistic

    # Wrap classes to enable Vertex remote execution
    # Don't need this step after import hook is implemented
    LogisticRegression = vertexai.preview.remote(_logistic.LogisticRegression)

    # Instantiate model. C will be tuned.
    model = LogisticRegression(C=C)

    # Set training config
    model.fit.vertex.remote_config.display_name = REMOTE_JOB_NAME + "-test-tuning"
    return model


tuner = VizierHyperparameterTuner(
    get_model_func=get_model_func,
    max_trial_count=MAX_TRIAL_COUNT,
    parallel_trial_count=PARALLEL_TRIAL_COUNT,
    hparam_space=HPARAM_SPACE,
    metric_id=METRIC_ID,
    metric_goal=METRIC_GOAL,
)

# Tune model using Vizier. Tuning runs locally while trials run on Vertex.
tuner.fit(X_train, y_train)

#### Get the best model

From the hyperparameter tuning, get the best model from the trials.

In [None]:
best_model = tuner.get_best_models()[0]

#### Local evaluation

Next, evaluate the best model from the trials locally.

In [None]:
# Switch to local mode for testing
vertexai.preview.init(remote=False)

# Evaluate model's accuracy score
print(f"Train accuracy: {best_model.score(X_train, y_train)}")
print(f"Test accuracy: {best_model.score(X_test, y_test)}")

### Local tuning

Now, you repeat the same, but do the tuning locally.

In [None]:
# Switch to local mode for training
vertexai.preview.init(remote=False)


def get_model_func(C: float):
    # Instantiate model. C will be tuned.
    return LogisticRegression(C=C)


tuner = VizierHyperparameterTuner(
    get_model_func=get_model_func,
    max_trial_count=MAX_TRIAL_COUNT,
    parallel_trial_count=PARALLEL_TRIAL_COUNT,
    hparam_space=HPARAM_SPACE,
    metric_id=METRIC_ID,
    metric_goal=METRIC_GOAL,
)

# Tune model using Vizier. Tuning and training run locally.
tuner.fit(X_train, y_train)

#### Local evaluation

Finally, you do a local evaluation of the model.

In [None]:
best_model = tuner.get_best_models()[0]

In [None]:
# Switch to local mode for testing
vertexai.preview.init(remote=False)

best_model = tuner.get_best_models()[0]

# Evaluate model's accuracy score
print(f"Train accuracy: {best_model.score(X_train, y_train)}")
print(f"Test accuracy: {best_model.score(X_test, y_test)}")

## PyTorch

### Remote tuning

First, hyperparameter tune the PyTorch model as a remote tuning job:

- Reinitialize Vertex AI for remote tuning.
- Set the hyperparameter tuning configuration.
- Invoke the hyperparameter tuning job.
    - Set TorchLogisticRegression for the remote training job.
    - Invoke TorchLogisticRegression locally which will launch the remote training job.

In [None]:
# Switch to remote mode for training
vertexai.preview.init(remote=True)

HPARAM_SPACE = [
    {
        "parameter_id": "num_epochs",
        "integer_value_spec": {"min_value": 100, "max_value": 150},
    },
    {"parameter_id": "lr", "double_value_spec": {"min_value": 0.01, "max_value": 0.05}},
]


# Define model
class TorchLogisticRegression(VertexModel, torch.nn.Module):
    def __init__(self, input_size: int, output_size: int):
        torch.nn.Module.__init__(self)
        VertexModel.__init__(self)
        self.linear = torch.nn.Linear(input_size, output_size)
        self.softmax = torch.nn.Softmax(dim=1)

    def forward(self, x):
        return self.softmax(self.linear(x))

    @vertexai.preview.developer.mark.train()
    def train(self, X, y, num_epochs, lr):
        X, y = torch.tensor(X).to(torch.float32), torch.tensor(y)
        dataloader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(X, y),
            batch_size=10,
            shuffle=True,
            generator=torch.Generator(device=X.device),
        )

        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(self.parameters(), lr=lr)

        for t in range(num_epochs):
            for batch, (X, y) in enumerate(dataloader):
                optimizer.zero_grad()
                pred = self(X)
                loss = criterion(pred, y)
                loss.backward()
                optimizer.step()

    def predict(self, X):
        X = torch.tensor(X).to(torch.float32)
        with torch.no_grad():
            pred = torch.argmax(self(X), dim=1)
        return pred


def get_model_func():
    # Instantiate model
    model = TorchLogisticRegression(4, 3)

    # Set training config
    model.train.vertex.remote_config.display_name = REMOTE_JOB_NAME + "-test-tuning"
    return model


tuner = VizierHyperparameterTuner(
    get_model_func=get_model_func,
    max_trial_count=MAX_TRIAL_COUNT,
    parallel_trial_count=PARALLEL_TRIAL_COUNT,
    hparam_space=HPARAM_SPACE,
)

# Tune model using Vizier. Tuning runs locally while trials run on Vertex.
tuner.fit(X_train, y_train)

#### Get the best model

From the hyperparameter tuning, get the best model from the trials.

In [None]:
best_model = tuner.get_best_models()[0]

#### Local evaluation

Next, evaluate the best model from the trials locally.

In [None]:
from sklearn.metrics import accuracy_score

# Switch to local mode for testing
vertexai.preview.init(remote=False)

# Evaluate model's accuracy score
print(f"Train accuracy: {accuracy_score(y_train, best_model.predict(X_train))}")
print(f"Test accuracy: {accuracy_score(y_test, best_model.predict(X_test))}")

### Local tuning

Now, you repeat the same, but do the tuning locally.

In [None]:
# Switch to local mode for training
vertexai.preview.init(remote=False)


def get_model_func():
    # Instantiate model
    return TorchLogisticRegression(4, 3)


tuner = VizierHyperparameterTuner(
    get_model_func=get_model_func,
    max_trial_count=MAX_TRIAL_COUNT,
    parallel_trial_count=PARALLEL_TRIAL_COUNT,
    hparam_space=HPARAM_SPACE,
)

# Tune model using Vizier. Tuning and training runs locally.
tuner.fit(X_train, y_train)

#### Local evaluation

Finally, you do a local evaluation of the model.

In [None]:
best_model = tuner.get_best_models()[0]

In [None]:
from sklearn.metrics import accuracy_score

# Switch to local mode for testing
vertexai.preview.init(remote=False)

best_model = tuner.get_best_models()[0]

# Evaluate model's accuracy score
print(f"Train accuracy: {accuracy_score(y_train, best_model.predict(X_train))}")
print(f"Test accuracy: {accuracy_score(y_test, best_model.predict(X_test))}")

## TensorFlow

### Remote tuning

First, hyperparameter tune the TensorFlow model as a remote tuning job:

- Reinitialize Vertex AI for remote tuning.
- Set the hyperparameter tuning configuration.
- Invoke the hyperparameter tuning job for remote execution.

In [None]:
# Switch to remote mode for training
vertexai.preview.init(remote=True)

HPARAM_SPACE = [
    {
        "parameter_id": "optimizer",
        "categorical_value_spec": {"values": ["adam", "sgd"]},
    },
    {"parameter_id": "epochs", "discrete_value_spec": {"values": [10, 15, 20]}},
]


def get_model_func(optimizer: str):
    # Wrap classes to enable Vertex remote execution
    # Don't need this step after import hook is implemented
    keras.Sequential = vertexai.preview.remote(keras.Sequential)

    # Instantiate model
    model = keras.Sequential(
        [keras.layers.Dense(5, input_shape=(4,)), keras.layers.Softmax()]
    )

    # Specify optimizer and loss function
    model.compile(optimizer=optimizer, loss="mean_squared_error")

    # Set training config
    model.fit.vertex.remote_config.display_name = REMOTE_JOB_NAME + "-test-tuning"
    return model


tuner = VizierHyperparameterTuner(
    get_model_func=get_model_func,
    max_trial_count=MAX_TRIAL_COUNT,
    parallel_trial_count=PARALLEL_TRIAL_COUNT,
    hparam_space=HPARAM_SPACE,
)

# Tune model using Vizier. Tuning runs locally while trials run on Vertex.
# batch_size is fixed input that is passed to model.fit().
tuner.fit(X_train, y_train, batch_size=32)

#### Get the best model

From the hyperparameter tuning, get the best model from the trials.

In [None]:
best_model = tuner.get_best_models()[0]

#### Local evaluation

Next, evaluate the best model from the trials locally.

In [None]:
# Switch to local mode for testing
vertexai.preview.init(remote=False)

# Evaluate model's accuracy score
print(f"Train loss: {best_model.evaluate(X_train, y_train)}")
print(f"Test loss: {best_model.evaluate(X_test, y_test)}")

### Local tuning

Now, you repeat the same, but do the tuning locally.

In [None]:
# Switch to local mode for training
vertexai.preview.init(remote=False)


def get_model_func(optimizer: str):
    # Instantiate model
    model = keras.Sequential(
        [keras.layers.Dense(5, input_shape=(4,)), keras.layers.Softmax()]
    )

    # Specify optimizer and loss function
    model.compile(optimizer=optimizer, loss="mean_squared_error")
    return model


tuner = VizierHyperparameterTuner(
    get_model_func=get_model_func,
    max_trial_count=MAX_TRIAL_COUNT,
    parallel_trial_count=PARALLEL_TRIAL_COUNT,
    hparam_space=HPARAM_SPACE,
)

# Tune model using Vizier. Tuning and trials run locally.
# batch_size is fixed input that is passed to model.fit().
tuner.fit(X_train, y_train, batch_size=32)

#### Local evaluation

Finally, you do a local evaluation of the model.

In [None]:
best_model = tuner.get_best_models()[0]

In [None]:
# Switch to local mode for testing
vertexai.preview.init(remote=False)

best_model = tuner.get_best_models()[0]

# Evaluate model's accuracy score
print(f"Train loss: {best_model.evaluate(X_train, y_train)}")
print(f"Test loss: {best_model.evaluate(X_test, y_test)}")

## PyTorch Lightning

### Remote tuning

First, hyperparameter tune the PyTorch Lightning model as a remote tuning job:

- Reinitialize Vertex AI for remote tuning.
- Set the hyperparameter tuning configuration.
- Invoke the hyperparameter tuning job for remote execution.

In [None]:
import lightning.pytorch as pl

# Switch to remote mode for training
vertexai.preview.init(remote=True)

HPARAM_SPACE = [
    {
        "parameter_id": "batch_size",
        "integer_value_spec": {"min_value": 10, "max_value": 20},
    }
]

PARALLEL_TRIAL_COUNT = 1

# Wrap classes to enable Vertex remote execution
# Don't need this step after import hook is implemented
pl.Trainer = vertexai.preview.remote(pl.Trainer)


class LitLogisticRegression(pl.LightningModule):
    def __init__(self, input_size: int, output_size: int):
        super().__init__()
        self.linear = torch.nn.Linear(input_size, output_size)
        self.softmax = torch.nn.Softmax(dim=1)

    def forward(self, x):
        return self.softmax(self.linear(x))

    def training_step(self, batch, batch_idx):
        x, y = batch[0], batch[1]
        y_hat = self(x)
        loss = torch.nn.functional.cross_entropy(y_hat, y)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.05)

    def predict(self, X):
        X = torch.tensor(X).to(torch.float32)
        with torch.no_grad():
            pred = torch.argmax(self(X), dim=1)
        return pred


def get_model_func(X_train, y_train, batch_size):
    model = LitLogisticRegression(4, 3)

    # Instantiate the training dataloader. batch_size will be tuned.
    train_dataloader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(
            torch.tensor(X_train).to(torch.float32),
            torch.tensor(y_train),
        ),
        batch_size=batch_size,
        shuffle=True,
    )

    trainer = pl.Trainer(max_epochs=10, accelerator="cpu", log_every_n_steps=10)

    # Set training config
    trainer.fit.vertex.remote_config.display_name = REMOTE_JOB_NAME + "-test-tuning"

    return {"model": model, "train_dataloaders": train_dataloader, "trainer": trainer}


tuner = VizierHyperparameterTuner(
    get_model_func=get_model_func,
    max_trial_count=MAX_TRIAL_COUNT,
    parallel_trial_count=PARALLEL_TRIAL_COUNT,
    hparam_space=HPARAM_SPACE,
)

# Tune model using Vizier. Tuning runs locally while trials run on Vertex.
tuner.fit(X_train, y_train)

#### Get the best model

From the hyperparameter tuning, get the best model from the trials.

In [None]:
best_model = tuner.get_best_models()[0]
trainer = best_model["trainer"]
model = best_model["model"]

#### Local evaluation

Next, evaluate the best model from the trials locally.

In [None]:
# Switch to local mode for testing
vertexai.preview.init(remote=False)

# Evaluate model's accuracy score
print(f"Train accuracy: {accuracy_score(y_train, model.predict(X_train))}")
print(f"Test accuracy: {accuracy_score(y_test, model.predict(X_test))}")

### Local tuning

Now, you repeat the same, but do the tuning locally.

In [None]:
# Switch to local mode for training
vertexai.preview.init(remote=False)


def get_model_func(X_train, y_train, batch_size):
    model = LitLogisticRegression(4, 3)

    # Instantiate the training dataloader. batch_size will be tuned.
    train_dataloader = torch.utils.data.DataLoader(
        torch.utils.data.TensorDataset(
            torch.tensor(X_train).to(torch.float32),
            torch.tensor(y_train),
        ),
        batch_size=batch_size,
        shuffle=True,
    )

    trainer = pl.Trainer(max_epochs=10, accelerator="cpu", log_every_n_steps=10)

    # Set remote to False
    trainer.fit.vertex.remote = False

    return {"model": model, "train_dataloaders": train_dataloader, "trainer": trainer}


tuner = VizierHyperparameterTuner(
    get_model_func=get_model_func,
    max_trial_count=MAX_TRIAL_COUNT,
    parallel_trial_count=PARALLEL_TRIAL_COUNT,
    hparam_space=HPARAM_SPACE,
)

# Tune model using Vizier. Tuning and training run locally.
tuner.fit(X_train, y_train)

#### Local evaluation

Finally, you do a local evaluation of the model.

In [None]:
best_model = tuner.get_best_models()[0]
trainer = best_model["trainer"]
model = best_model["model"]

In [None]:
# Switch to local mode for testing
vertexai.preview.init(remote=False)

# Evaluate model's accuracy score
print(f"Train accuracy: {accuracy_score(y_train, model.predict(X_train))}")
print(f"Test accuracy: {accuracy_score(y_test, model.predict(X_test))}")

## TabNet

### Remote tuning

First, hyperparameter tune the TabNet model as a remote tuning job:

- Reinitialize Vertex AI for remote tuning.
- Set the hyparameter tuning configuration.
- Invoke the hyperparameter tuning job for remote execution.

In [None]:
# Switch to remote mode for testing
vertexai.preview.init(remote=True)

PARALLEL_TRIAL_COUNT = 2
HPARAM_SPACE = [
    {
        "parameter_id": "batch_size",
        "integer_value_spec": {"min_value": 10, "max_value": 100},
    },
    {
        "parameter_id": "learning_rate",
        "double_value_spec": {"min_value": 0.01, "max_value": 0.05},
    },
]


def get_model_func(batch_size, learning_rate):
    # Instantiate model
    trainer = TabNetTrainer(
        model_type="classification",
        target_column="target",
        learning_rate=learning_rate,
        max_steps=100,
        batch_size=batch_size,
    )

    # Set training config
    trainer.fit.vertex.remote_config.display_name = REMOTE_JOB_NAME + "-test-tuning"
    return trainer


tuner = VizierHyperparameterTuner(
    get_model_func=get_model_func,
    max_trial_count=MAX_TRIAL_COUNT,
    parallel_trial_count=PARALLEL_TRIAL_COUNT,
    hparam_space=HPARAM_SPACE,
)


# TabNet takes a single dataframe containing features and target column.
# y is the target column name.
columns = ["0", "1", "2", "3"]
training_data = pd.DataFrame(X_train, columns=columns)
training_data["target"] = y_train
training_data["target"] = training_data["target"].astype("category")

X_train = pd.DataFrame(X_train, columns=columns)
y_train = pd.DataFrame(y_train, columns=["target"])
y_train["target"] = y_train["target"].astype("category")

X_test = pd.DataFrame(X_test, columns=columns)
y_test = pd.DataFrame(y_test, columns=["target"])
y_test["target"] = y_test["target"].astype("category")

# Tune model using Vizier. Tuning runs locally while trials run on Vertex.
tuner.fit(training_data, "target", X_test, y_test)

#### Get the best model

From the hyperparameter tuning, get the best model from the trials.

In [None]:
best_model = tuner.get_best_models()[0]

#### Local evaluation

Next, evaluate the best model from the trials locally.

In [None]:
# Switch to local mode for testing
vertexai.preview.init(remote=False)

# Evaluate model's accuracy score
print(f"Train accuracy: {accuracy_score(y_train, best_model.predict(X_train))}")
print(f"Test accuracy: {accuracy_score(y_test, best_model.predict(X_test))}")

## Cleaning up

To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud
project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.

Otherwise, you can delete the individual resources you created in this tutorial.

In [None]:
import os

delete_bucket = False

if delete_bucket or os.getenv("IS_TESTING"):
    ! gsutil rm -rf {BUCKET_URI}