In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# E2E ML on GCP: MLOps stage 2 : Get started with autologging using Vertex AI Experiments for XGBoost models

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage2/get_started_with_vertex_experiments_autologging_xgboost.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage2/get_started_with_vertex_experiments_autologging_xgboost.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/ml_ops/stage2/get_started_with_vertex_experiments_autologging_xgboost.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
      Open in Vertex AI Workbench
    </a>
  </td>
</table>
<br/><br/><br/>

## Overview


This tutorial demonstrates how to use the `Vertex AI Experiments` with DIY code to implement automatic logging of parameters and metrics for experiments.

### Objective

In this tutorial, you learn how to create an experiment for training an XGBoost model, and automatically log parameters and metrics using the enclosed do-it-yourself (DIY) code.

This tutorial uses the following Google Cloud ML services and resources:

- `Vertex AI Experiments`

The steps performed include:

- Construct the DIY autologging code.
- Construct training package with call to autologging.
- Train a model.
- View the experiment
- Delete the experiment.

### Dataset

The dataset used for this tutorial is the [Iris dataset](https://www.tensorflow.org/datasets/catalog/iris) from [TensorFlow Datasets](https://www.tensorflow.org/datasets/catalog/overview). This dataset does not require any feature engineering. The version of the dataset in this tutorial is stored in a public Cloud Storage bucket. The trained model predicts the type of Iris flower species from a class of three species: setosa, virginica, or versicolor.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI
pricing](https://cloud.google.com/vertex-ai/pricing) and [Cloud Storage
pricing](https://cloud.google.com/storage/pricing), and use the [Pricing
Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

### Set up your local development environment

If you are using Colab or Vertex Workbench AI Notebooks, your environment already meets all the requirements to run this notebook. You can skip this step.

Otherwise, make sure your environment meets this notebook's requirements. You need the following:

- The Cloud Storage SDK
- Git
- Python 3
- virtualenv
- Jupyter notebook running in a virtual environment with Python 3

The Cloud Storage guide to [Setting up a Python development environment](https://cloud.google.com/python/setup) and the [Jupyter installation guide](https://jupyter.org/install) provide detailed instructions for meeting these requirements. The following steps provide a condensed set of instructions:

1. [Install and initialize the SDK](https://cloud.google.com/sdk/docs/).

2. [Install Python 3](https://cloud.google.com/python/setup#installing_python).

3. [Install virtualenv](https://cloud.google.com/python/setup#installing_and_using_virtualenv) and create a virtual environment that uses Python 3.  Activate the virtual environment.

4. To install Jupyter, run `pip3 install jupyter` on the command-line in a terminal shell.

5. To launch Jupyter, run `jupyter notebook` on the command-line in a terminal shell.

6. Open this notebook in the Jupyter Notebook Dashboard.


## Installation

Install the following packages to execute this notebook.

In [None]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

! pip3 install --upgrade --quiet {USER_FLAG} google-cloud-aiplatform \
                                             xgboost \
                                             scikit-learn \
                                             numpy

### Restart the kernel

After you install the additional packages, you need to restart the notebook kernel so it can find the packages.

In [None]:
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## Before you begin

### GPU runtime

This tutorial does not require a GPU runtime.

### Set up your Google Cloud project

**The following steps are required, regardless of your notebook environment.**

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.

2. [Make sure that billing is enabled for your project.](https://cloud.google.com/billing/docs/how-to/modify-project)

3. [Enable the following APIs: Vertex AI APIs, Compute Engine APIs, and Cloud Storage.](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component,storage-component.googleapis.com)

4. If you are running this notebook locally, you need to install the [Cloud SDK]((https://cloud.google.com/sdk)).

5. Enter your project ID in the cell below. Then run the  cell to make sure the
Cloud SDK uses the right project for all the commands in this notebook.

**Note**: Jupyter runs lines prefixed with `!` as shell commands, and it interpolates Python variables prefixed with `$`.

#### Set your project ID

**If you don't know your project ID**, you may be able to get your project ID using `gcloud`.

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = ! gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

In [None]:
! gcloud config set project $PROJECT_ID

#### Region

You can also change the `REGION` variable, which is used for operations
throughout the rest of this notebook.  Below are regions supported for Vertex AI. We recommend that you choose the region closest to you.

- Americas: `us-central1`
- Europe: `europe-west4`
- Asia Pacific: `asia-east1`

You may not use a multi-regional bucket for training with Vertex AI. Not all regions provide support for all Vertex AI services.

Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations)

In [None]:
REGION = "[your-region]"  # @param {type: "string"}

if REGION == "[your-region]":
    REGION = "us-central1"

#### UUID

If you are in a live tutorial session, you might be using a shared test account or project. To avoid name collisions between users on resources created, you create a uuid for each instance session, and append it onto the name of resources you create in this tutorial.

In [None]:
import random
import string


# Generate a uuid of a specifed length(default=8)
def generate_uuid(length: int = 8) -> str:
    return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))


UUID = generate_uuid()

### Authenticate your Google Cloud account

**If you are using Vertex AI Workbench Notebooks**, your environment is already authenticated. 

**If you are using Colab**, run the cell below and follow the instructions when prompted to authenticate your account via oAuth.

**Otherwise**, follow these steps:

In the Cloud Console, go to the [Create service account key](https://console.cloud.google.com/apis/credentials/serviceaccountkey) page.

**Click Create service account**.

In the **Service account name** field, enter a name, and click **Create**.

In the **Grant this service account access to project** section, click the Role drop-down list. Type "Vertex" into the filter box, and select **Vertex Administrator**. Type "Storage Object Admin" into the filter box, and select **Storage Object Admin**.

Click Create. A JSON file that contains your key downloads to your local environment.

Enter the path to your service account key as the GOOGLE_APPLICATION_CREDENTIALS variable in the cell below and run the cell.

In [None]:
# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your GCP account. This provides access to your
# Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

import os
import sys

# If on Vertex AI Workbench, then don't execute this code
IS_COLAB = "google.colab" in sys.modules
if not os.path.exists("/opt/deeplearning/metadata/env_version") and not os.getenv(
    "DL_ANACONDA_HOME"
):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this notebook locally, replace the string below with the
    # path to your service account key and run this cell to authenticate your GCP
    # account.
    elif not os.getenv("IS_TESTING"):
        %env GOOGLE_APPLICATION_CREDENTIALS ''

### Import libraries and define constants

In [None]:
import google.cloud.aiplatform as aiplatform
import numpy as np
import xgboost as xgb
from sklearn.metrics import accuracy_score, precision_score, recall_score

# to suppress lint message (unused)
precision_score, recall_score

## Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project and corresponding bucket.

In [None]:
aiplatform.init(project=PROJECT_ID, location=REGION)

## DIY code for autologging XGBoost models

The code below implements autologging for XGBoost models.

- `autologging()`: Initializes the experiment and uses heap injection to replace `xgboost.train()` symbols on the heap with the redirect wrapper function `VertexXGBtrain`.

- `VertexXGBtrain`: A wrapper function for XGBoost train() function. Automatically logs hyperparameters and calls the underlyig function.

- `VertexSKLaccuracy_score`: A wrapper function for scikit-learn accuracy_score() function. Automatically calls underlying function and logs the metrics results.

In [None]:
def autolog(
    project: str = None,
    location: str = None,
    staging_bucket: str = None,
    experiment: str = None,
    run: str = None,
    framework: str = "tf",
):
    """
    Enable automatic logging of parameters and metrics in Vertex AI Experiments,
    for corresponding framework.

        project: The project ID
        location : The region
        staging_bucket: temporary bucket
        experiment: The name of the experiment
        run: The name of the run within the experiment
        framework: The ML framework for which a model is being trained.
    """
    # autologging
    if framework == "tf":
        try:
            globals()["Sequential"] = VertexTFSequential
            if "tf" in globals():
                tf.keras.Sequential = VertexTFSequential
            if "tensorflow" in globals():
                tensorflow.keras.Sequential = VertexTFSequential
        except:
            pass

        try:
            globals()["Model"] = VertexTFModel
            if "tf" in globals():
                tf.keras.Model = VertexTFModel
            if "tensorflow" in globals():
                tensorflow.keras.Model = VertexTFModel
        except:
            pass
    elif framework == "xgb":
        global real_xgb_train
        global real_accuracy_score, real_precision_score, real_recall_score
        import sklearn

        try:
            if "xgboost" in globals():
                real_xgb_train = xgboost.train
                xgboost.train = VertexXGBtrain
        except:
            pass

        try:
            if "xgb" in globals():
                real_xgb_train = xgb.train
                xgb.train = VertexXGBtrain
        except:
            pass

        try:
            global accuracy_score, precision_score, recall_score
            if "accuracy_score" in globals():
                real_accuracy_score = sklearn.metrics.accuracy_score
                sklearn.metrics.accuracy_score = VertexSKLaccuracy_score
                accuracy_score = VertexSKLaccuracy_score
            if "precision_score" in globals():
                real_precision_score = sklearn.metrics.precision_score
                sklearn.metrics.precision_score = VertexSKLprecision_score
                precision_score = VertexSKLprecision_score
            if "recall_score" in globals():
                real_recall_score = sklearn.metrics.recall_score
                sklearn.metrics.recall_score = VertexSKLrecall_score
                recall_score = VertexSKLrecall_score
        except:
            pass

    if project:
        aiplatform.init(
            project=project, location=location, staging_bucket=staging_bucket
        )

    if experiment:
        aiplatform.init(experiment=experiment)
    if run:
        aiplatform.start_run(run)


def VertexXGBtrain(
    params,
    dtrain,
    num_boost_round=10,
    evals=None,
    obj=None,
    maximize=None,
    early_stopping_rounds=None,
    evals_result=None,
    verbose_eval=True,
    callbacks=None,
    custom_metric=None,
):
    """
    Wrapper function for autologging training parameters with Vertex AI Experiments
    Args:
        same as underlying xgb.train() method
    """
    global real_xgb_train

    aiplatform.log_params({"train.num_boost_round": int(num_boost_round)})

    if params:
        if "booster" in params:
            aiplatform.log_params({"train.booster": int(params["booster"])})

        # booster parameters
        if "eta" in params:
            aiplatform.log_params({"train.eta": int(params["eta"])})
        if "max_depth" in params:
            aiplatform.log_params({"train.max_depth": int(params["max_depth"])})
        if "max_leaf_nodes" in params:
            aiplatform.log_params(
                {"train.max_leaf_nodes": int(params["max_leaf_nodes"])}
            )
        if "gamma" in params:
            aiplatform.log_params({"train.gamma": int(params["gamma"])})
        if "alpha" in params:
            aiplatform.log_params({"train.alpha": int(params["alpha"])})

    return real_xgb_train(
        params=params,
        dtrain=dtrain,
        num_boost_round=num_boost_round,
        evals=evals,
        obj=obj,
        maximize=maximize,
        early_stopping_rounds=early_stopping_rounds,
        evals_result=evals_result,
        verbose_eval=verbose_eval,
        callbacks=callbacks,
        custom_metric=custom_metric,
    )


def VertexSKLaccuracy_score(labels, predictions):
    """
    Wrapper function for autologging training metrics with Vertex AI Experiments
    Args:
        same as underlying accuracy_score function
    """
    global real_accuracy_score
    accuracy = real_accuracy_score(labels, predictions)
    aiplatform.log_metrics({"accuracy": accuracy})
    return accuracy


def VertexSKLprecision_score(
    y_true,
    y_pred,
    *,
    labels=None,
    pos_label=1,
    average="binary",
    sample_weight=None,
    zero_division="warn",
):
    """
    Wrapper function for autologging training metrics with Vertex AI Experiments
    Args:
        same as underlying precision_score function
    """
    global real_precision_score
    precision = real_precision_score(
        y_true,
        y_pred,
        labels=labels,
        pos_label=pos_label,
        average=average,
        sample_weight=sample_weight,
        zero_division=zero_division,
    )
    aiplatform.log_metrics({"precision": precision})
    return precision


def VertexSKLrecall_score(
    y_true,
    y_pred,
    *,
    labels=None,
    pos_label=1,
    average="binary",
    sample_weight=None,
    zero_division="warn",
):
    """
    Wrapper function for autologging training metrics with Vertex AI Experiments
    Args:
        same as underlying recall_score function
    """
    global real_recall_score
    recall = real_recall_score(
        y_true,
        y_pred,
        labels=labels,
        pos_label=pos_label,
        average=average,
        sample_weight=sample_weight,
        zero_division=zero_division,
    )
    aiplatform.log_metrics({"recall": recall})
    return recall


class VertexXGBBooster(xgb.Booster):
    """
    WIP
    """

    def __init__(self, params=None, cache=None, model_file=None):
        super().__init__(params, cache, model_file)

    def boost(
        self, dtrain: xgb.core.DMatrix, grad: np.ndarray, hess: np.ndarray
    ) -> None:
        return super().boost(dtrain, grad, hess)

    def eval(
        self, data: xgb.core.DMatrix, name: str = "eval", iteration: int = 0
    ) -> str:
        return super().eval(data, name, iteration)

    def update(self, dtrain: xgb.core.DMatrix, iteration: int, fobj=None) -> None:
        return super().update(dtrain, iteration, fobj)

### Train the XGBoost model with Vertex AI Experiments

In the following code, you build, train and evaluate an XGBoost tabular model. The Python script includes the following calls to integrate `Vertex AI Experiments`:

- command-line arguments: The arguments `experiment` and `run` are used to pass in the experiment and run names for the experiment.
- `autologging()`: Initializes the experiment and does the heap injection.
- `aiplatform.start_execution()`: Initializes a context for linking artifacts.
- `aiplatform.end_run()`: Ends the experiment.

*Note:* The functions `xgb.train` and `accuracy_score` will be redirected to `VertexXGBtrain` and VertexSKLaccuracy_score, respectively, by heap injection. When subsequent calls are made to the `train()` and `accuracy()` function,s they will be executed as the corresponding `VertexXGBtrain` and `VertexSKLaccuracy_score` functions.

In [None]:
EXPERIMENT_NAME = f"myexperiment{UUID}"
RUN_NAME = "run-1"

DATASET_DIR = "gs://cloud-samples-data/ai-platform/iris"
DATASET_DATA_URL = DATASET_DIR + "/iris_data.csv"
DATASET_LABELS_URL = DATASET_DIR + "/iris_target.csv"

BOOSTED_ROUNDS = 20

import logging
import os
import subprocess
import sys

import hypertune
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split


def get_data():
    # gsutil outputs everything to stderr so we need to divert it to stdout.
    subprocess.check_call(
        ["gsutil", "cp", DATASET_DATA_URL, "data.csv"], stderr=sys.stdout
    )
    # gsutil outputs everything to stderr so we need to divert it to stdout.
    subprocess.check_call(
        ["gsutil", "cp", DATASET_LABELS_URL, "labels.csv"], stderr=sys.stdout
    )

    # Load data into pandas, then use `.values` to get NumPy arrays
    data = pd.read_csv("data.csv").values
    labels = pd.read_csv("labels.csv").values

    # Convert one-column 2D array into 1D array for use with XGBoost
    labels = labels.reshape((labels.size,))

    train_data, test_data, train_labels, test_labels = train_test_split(
        data, labels, test_size=0.2, random_state=7
    )

    # Load data into DMatrix object
    dtrain = xgb.DMatrix(train_data, label=train_labels)
    return dtrain, test_data, test_labels


def train_model(dtrain):
    logging.info("Start training ...")
    # Train XGBoost model
    params = {"max_depth": 3, "objective": "multi:softmax", "num_class": 3}
    model = xgb.train(params=params, dtrain=dtrain, num_boost_round=BOOSTED_ROUNDS)
    logging.info("Training completed")
    return model


def evaluate_model(model, test_data, test_labels):
    dtest = xgb.DMatrix(test_data)
    pred = model.predict(dtest)
    predictions = [round(value) for value in pred]
    # evaluate predictions
    accuracy = accuracy_score(test_labels, predictions)

    logging.info(f"Evaluation completed with model accuracy: {accuracy}")

    # report metric for hyperparameter tuning
    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag="accuracy", metric_value=accuracy
    )
    return accuracy


# autologging
autolog(experiment=EXPERIMENT_NAME, run=RUN_NAME, framework="xgb")

with aiplatform.start_execution(
    schema_title="system.ContainerExecution", display_name="example_training"
) as execution:
    dtrain, test_data, test_labels = get_data()
    model = train_model(dtrain)
    accuracy = evaluate_model(model, test_data, test_labels)

aiplatform.end_run()

#### Get the experiment results

Next, you use the experiment name as a parameter to the method `get_experiment_df()` to get the results of the experiment as a pandas dataframe.

In [None]:
experiment_df = aiplatform.get_experiment_df()
experiment_df = experiment_df[experiment_df.experiment_name == EXPERIMENT_NAME]
experiment_df.T

#### Delete the experiment

Since the experiment was created within a training script, to delete the experiment you use the `list()` method to obtain all the experiments for the project, and then filter on the experiment name.

In [None]:
experiments = aiplatform.Experiment.list()
for experiment in experiments:
    if experiment.name == EXPERIMENT_NAME:
        experiment.delete()

# Cleaning up

To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud
project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.

Otherwise, you can delete the individual resources you created in this tutorial.

In [None]:
# There are no resources to cleanup