In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Experiments: Custom training autologging - Local script

<table align="left">

  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/experiments/get_started_with_custom_training_autologging_local_script.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"><br> Run in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fofficial%2Fexperiments%2Fget_started_with_custom_training_autologging_local_script.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/experiments/get_started_with_custom_training_autologging_local_script.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br>
      Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/experiments/get_started_with_custom_training_autologging_local_script.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br>
      View on GitHub
    </a>
  </td>
</table>
<br/>

**_NOTE_**: This notebook has been tested in the following environment:

* Python version = 3.9

## Overview

As a Data Scientist experimenting with large models, you need a way to run experiments on a scalable training service to log parameters and metrics. This guarantees reproducibility.

With Vertex AI training and Vertex AI Experiments autologging integration, you can run your ML experiments at scale and autolog their parameters and metrics
with the `enable_autolog` argument.

Learn more about [Vertex AI Experiments](https://cloud.google.com/vertex-ai/docs/experiments/intro-vertex-ai-experiments)
and how to [Autolog data to an experiment run](https://cloud.google.com/vertex-ai/docs/experiments/autolog-data).

### Objective

In this tutorial, you learn how to autolog parameters and metrics of an ML experiment running on Vertex AI Training by leveraging the integration with Vertex AI Experiments.

This tutorial uses the following Google Cloud ML services and resources:

- Vertex AI Experiments
- Vertex AI Training

The steps performed include:

- Formalize model experiment in a script
- Run model traning using local script on Vertex AI Training
- Check out ML experiment parameters and metrics in Vertex AI Experiments

### Dataset

The [Glass Identification dataset](https://archive-beta.ics.uci.edu/dataset/42/glass+identification) is a dataset from USA Forensic Science Service with 6 types of glass defined in terms of their oxide content (for example, Na, Fe, K). The goal is to classify the types of glass based on oxide features.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing),
and [Cloud Storage pricing](https://cloud.google.com/storage/pricing),
and use the [Pricing Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

## Get started

### Install Vertex AI SDK for Python and other required packages


In [None]:
# Install the packages
USER = ""
! pip3 install {USER} --upgrade google-cloud-aiplatform --quiet --no-warn-conflicts

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.


In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK for Python

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

### Create a Cloud Storage bucket

Create a storage bucket to store intermediate artifacts such as datasets.


In [None]:
BUCKET_URI = f"gs://your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

**If your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [None]:
! gsutil mb -l $LOCATION -p $PROJECT_ID $BUCKET_URI

### Initialize Vertex AI SDK for Python

In [None]:
from google.cloud import aiplatform as vertex_ai

vertex_ai.init(project=PROJECT_ID, location=LOCATION, staging_bucket=BUCKET_URI)

### UUID
If you’re in a live tutorial session, you may be using a shared test account or project. To avoid name collisions between users on resources created, create a Universal Unique Identifier (uuid) for each instance session. Append the UUID to the name of the resources you create in this tutorial.

In [None]:
import random
import string


# Generate a uuid of length 8
def generate_uuid():
    return "".join(random.choices(string.ascii_lowercase + string.digits, k=8))


UUID = generate_uuid()

### Set Service Account

**If you don't know your service account**, try to get your service account using `gcloud` command by executing the second cell below.

*Note:* The code for automatically finding your service account works on a user-managed Workbench AI noteboook.
If you are using a fully-managed notebook, you'll need to manually enter your service account.

In [None]:
SERVICE_ACCOUNT = "[your-service-account]"  # @param {type:"string"}

In [None]:
IS_COLAB = False
if "google.colab" in sys.modules:
    IS_COLAB = True


if (
    SERVICE_ACCOUNT == ""
    or SERVICE_ACCOUNT is None
    or SERVICE_ACCOUNT == "[your-service-account]"
):
    # Get your service account from gcloud
    if not IS_COLAB:
        shell_output = !gcloud auth list 2>/dev/null
        SERVICE_ACCOUNT = shell_output[2].replace("*", "").strip()

    if IS_COLAB:
        shell_output = ! gcloud projects describe  $PROJECT_ID
        project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
        SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"

    print("Service Account:", SERVICE_ACCOUNT)

#### Set service account access for Vertex AI Training

Run the following commands to grant your service account access to read and update metadata in Vertex AI ML Metadata while the custom training job is running -- you only need to run these once per service account. Check out the [documentation](https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training#create_a_service_account_with_required_permissions) to get more information.

In [None]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI

! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI

### Set up project template
Set the folder you use in this tutorial.

In [None]:
import os

TUTORIAL_DIR = os.path.join(
    os.getcwd(), "custom_training_autologging_local_script_tutorial"
)
os.makedirs(TUTORIAL_DIR, exist_ok=True)

### Get dataset
Get the glass classification dataset from the public Cloud Storage bucket.

In [None]:
SOURCE_DATA_URL = "gs://cloud-samples-data/vertex-ai/dataset-management/datasets/uci_glass_preprocessed/glass.csv"
DESTINATION_DATA_URL = f"{BUCKET_URI}/data/glass.csv"

! gsutil cp $SOURCE_DATA_URL $DESTINATION_DATA_URL

### Import libraries

In [None]:
import os

### Define constants

In [None]:
# Training
EXPERIMENT_NAME = f"glass-classification-{UUID}"
TRAIN_SCRIPT_PATH = os.path.join(TUTORIAL_DIR, "task.py")
JOB_DISPLAY_NAME = f"sklearn-autologged-custom-job-{UUID}"
PRE_BUILT_TRAINING_CONTAINER_IMAGE_URI = f"{LOCATION.split('-')[0]}-docker.pkg.dev/vertex-ai/training/tf-cpu.2-12.py310:latest"
MODEL_FILE_URI = f"{BUCKET_URI}/models/model.joblib"
DESTINATION_DATA_PATH = DESTINATION_DATA_URL.replace("gs://", "/gcs/")
MODEL_FILE_PATH = MODEL_FILE_URI.replace("gs://", "/gcs/")
REPLICA_COUNT = 1
TRAIN_MACHINE_TYPE = "n1-standard-4"
TRAINING_JOBS_URI = f"{BUCKET_URI}/jobs"

### Create an experiment for tracking training parameters and metrics

To start, initiate an experiment using the `init()` method.

In [None]:
vertex_ai.init(
    project=PROJECT_ID,
    location=LOCATION,
    staging_bucket=BUCKET_URI,
    experiment=EXPERIMENT_NAME,
)

### Train a scikit-learn model with a prebuilt container

Then, you train a custom model using a prebuilt container for scikit-learn models.


#### Create scikit-learn training script

In [None]:
task_script = f"""
#!/usr/bin/env python3

'''
A simple module to train a classifier on the glass dataset.
'''

# Libraries
import argparse
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib


# Variables
DATA_PATH = '{DESTINATION_DATA_PATH}'
MODEL_PATH = '{MODEL_FILE_PATH}'
TEST_SIZE = 0.2
SEED = 8

# Helpers
def read_data(path):
    df = pd.read_csv(path)
    return df


def split_data(df):
    y = df.pop('glass_type')
    X = df
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=SEED)
    return X_train, X_test, y_train, y_test


def train_model(X_train, y_train):
    model = RandomForestClassifier(n_estimators=5)
    model.fit(X_train, y_train)
    return model


def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy


def save_model(model, path):
    p = Path(path)
    if not p.parent.exists():
      p.parent.mkdir(parents=True)
    joblib.dump(model, path)


def main():

    # Read data
    df = read_data(DATA_PATH)

    # Split data
    X_train, X_test, y_train, y_test = split_data(df)

    # Train model
    model = train_model(X_train, y_train)

    # Evaluate model
    accuracy = evaluate_model(model, X_test, y_test)
    print('Model accuracy:', accuracy)

    # Save model
    save_model(model, MODEL_PATH)



if __name__ == '__main__':

    # Run main
    main()
"""

with open(TRAIN_SCRIPT_PATH, "w") as train_file:
    train_file.write(task_script)
train_file.close()

#### Define custom training job

Define a custom job with the prebuilt container image for training code packaged as Python script. In this case, you set `enable_autolog=True` to automatically track parameters and metrics after the training job completes. 

In [None]:
job = vertex_ai.CustomJob.from_local_script(
    project=PROJECT_ID,
    staging_bucket=TRAINING_JOBS_URI,
    display_name=JOB_DISPLAY_NAME,
    script_path=TRAIN_SCRIPT_PATH,
    container_uri=PRE_BUILT_TRAINING_CONTAINER_IMAGE_URI,
    requirements=["pandas", "scikit-learn"],
    replica_count=REPLICA_COUNT,
    machine_type=TRAIN_MACHINE_TYPE,
    enable_autolog=True,
)

### Run custom training job
Next, you run the training job using the method `run`.

In [None]:
job.run(experiment=EXPERIMENT_NAME, service_account=SERVICE_ACCOUNT)

### Get your autologged experiment

After you train your model, you can get parameters and metrics of the autologged experiment.

In [None]:
experiment_df = vertex_ai.get_experiment_df(experiment=EXPERIMENT_NAME)
experiment_df.T

Also you can get custom training job metadata associated with the experiment you run. You use `job_spec` property of the `CustomJob` class to print custom job metadata such as the training python package, training resources and more. 


In [None]:
job.job_spec

## Cleaning up

To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud
project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.

Otherwise, you can delete the individual resources you created in this tutorial.

In [None]:
import os

# delete flags
delete_experiment = False
delete_bucket = False

# Delete experiment
if delete_experiment or os.getenv("IS_TESTING"):
    experiment = vertex_ai.Experiment.get(experiment_name=EXPERIMENT_NAME)
    experiment.delete(delete_backing_tensorboard_runs=True)

# Delete Cloud Storage objects that were created
if delete_bucket or os.getenv("IS_TESTING"):
    ! gsutil -m rm -r $BUCKET_URI