In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Model Monitoring for Model Outside Vertex AI

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/model_monitoring_v2/model_monitoring_for_model_outside_vertex.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fofficial%2model_monitoring_v2%2model_monitoring_for_model_outside_vertex.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/model_monitoring_v2/model_monitoring_for_model_outside_vertex.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/model_monitoring_v2/model_monitoring_for_model_outside_vertex.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>


## Overview

This tutorial demonstrates how to use the Vertex AI SDK for Python to set up Vertex AI Model Monitoring V2 for your model. Model Monitoring V2 now supports models outside of Vertex AI (allowing you to register a referenced/placeholder model in Vertex AI without artifacts). This capability can also extend to feature store monitoring.

### Objective

The steps performed include the following:

- Register a referenced/placeholder model for model outside Vertex AI
- Create a Model Monitor
- Run on-demand model monitoring job
- Continous model monitoring


### Costs

Vertex AI Model Monitoring v2 is free during the public preview period, but you will still be billed for the following Google Cloud services:

* [BigQuery](https://cloud.google.com/bigquery/pricing)
* [Cloud Storage](https://cloud.google.com/storage/pricing)

## Getting Started

### Install Vertex AI SDK and other required packages

In [None]:
! pip3 install --upgrade --quiet \
    google-cloud-bigquery \
    pandas \
    pandas_gbq \
    pyarrow \
    tensorflow_data_validation[visualization] \
    google-cloud-aiplatform

Check that the version of google-cloud-aiplatform is 1.51.0 or later.

In [None]:
from google.cloud import aiplatform

aiplatform.__version__

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Please wait until it is finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}


import os

import vertexai

! gcloud config set project $PROJECT_ID
os.environ["GOOGLE_CLOUD_PROJECT"] = PROJECT_ID
! gcloud config set ai/region $LOCATION

vertexai.init(project=PROJECT_ID, location=LOCATION)

## Start Model Monitoring tutorial

### Step 1: Create a Cloud Storage bucket

Create a Cloud Storage bucket to store intermediate artifacts such as datasets.

In [None]:
# Create a Cloud Storage bucket
BUCKET_URI = f"gs://your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [None]:
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

### Step 2: Create a Placeholder Model in Vertex AI

You can create a referenced/placeholder model for your model outside Vertex AI using only a display name.

In [None]:
import google.cloud.aiplatform as aiplatform

MODEL_NAME = "penguins"  # @param {type:"string"}

model = aiplatform.Model.upload(display_name=MODEL_NAME, sync=True)

### Step 3: Prepare your Baseline and Target Dataset

We recommend using BigQuery to store your production dataset. Please ensure that features are stored in separate columns. The following is an example BigQuery schema:

<!-- <img src="https://services.google.com/fh/gumdrop/preview/misc/example_bq_schema_2.png" width="400" height="300"/> -->
<img src="https://services.google.com/fh/files/misc/example_bq_schema_2.png" width="400" height="300"/>

Note: If you want to setup continous monitoring with time specification, for example a time window, a timestamp column is required.

Create some fake serving data for this tutorial:

In [None]:
import numpy as np
import pandas as pd

# Define the number of rows
num_random = 100000

data = {
    "island": np.random.randint(0, 3, size=num_random),
    "culmen_length_mm": np.random.normal(50, 3, num_random),
    "culmen_depth_mm": np.random.normal(20, 3, num_random),
    "flipper_length_mm": np.random.randint(160, 250, size=num_random),
    "body_mass_g": np.random.randint(3000, 8000, size=num_random),
    "sex": np.random.randint(0, 3, size=num_random),
    "predicted_species": np.random.randint(0, 6, size=num_random),
}

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

# Define the time range (start and end dates) in UTC
# now-24h ~ now + 24h
start_date = pd.Timestamp.utcnow() - pd.Timedelta(days=1)
end_date = pd.Timestamp.utcnow() + pd.Timedelta(days=1)

# Generate a list to store the random timestamps
random_timestamps = []

# Generate random timestamps and add them to the list
for _ in range(num_random):
    random_seconds = np.random.randint((end_date - start_date).total_seconds())
    random_timestamp = start_date + pd.Timedelta(seconds=random_seconds)
    # Format the timestamp as a string with microseconds
    formatted_timestamp = random_timestamp.strftime("%Y-%m-%d %H:%M:%S.%f UTC")
    random_timestamps.append(formatted_timestamp)

df["timestamp"] = random_timestamps

df.to_csv("production.csv", index=False)

Create a BigQuery dataset and load the fake data to a table.

In [None]:
import pandas as pd

TIMESTAMP = pd.Timestamp.utcnow().strftime("%Y%m%d%H%M%S")

FAKE_DATA_BQ_DATASET = f"penguins_production_{TIMESTAMP}"
!bq mk --dataset $PROJECT_ID:$FAKE_DATA_BQ_DATASET

In [None]:
FAKE_DATA_BQ_TABLE = f"{FAKE_DATA_BQ_DATASET}.data"
!bq load --autodetect --source_format=CSV $FAKE_DATA_BQ_TABLE "production.csv"

Check the serving logging table.

In [None]:
import pandas as pd

query_string = f"SELECT * FROM `{FAKE_DATA_BQ_TABLE}` ORDER BY timestamp DESC LIMIT 10"
pd.read_gbq(query_string, project_id=PROJECT_ID)

### Step 4: Create a Model Monitor

Create a model monitor to associate monitoring details the model version you just created.

#### Define Model Monitoring Schema

The monitoring schema is a required configuration for a model monitor. The schema consists of input features names, prediction outputs, and ground truth (if available), along with their respective data types.

**Note: The schema is optional only for AutoML tables (Regression/Classification), as it will be automatically fetched when available (if Model Monitoring is unable to retrieve the schema, you will need to provide your own).**

##### You can manually define the model schema as follows:

In [None]:
from vertexai.resources.preview import ml_monitoring

MODEL_MONITORING_SCHEMA = ml_monitoring.spec.ModelMonitoringSchema(
    feature_fields=[
        ml_monitoring.spec.FieldSchema(name="island", data_type="categorical"),
        ml_monitoring.spec.FieldSchema(name="culmen_length_mm", data_type="float"),
        ml_monitoring.spec.FieldSchema(name="culmen_depth_mm", data_type="float"),
        ml_monitoring.spec.FieldSchema(name="flipper_length_mm", data_type="integer"),
        ml_monitoring.spec.FieldSchema(name="body_mass_g", data_type="integer"),
        ml_monitoring.spec.FieldSchema(name="sex", data_type="categorical"),
    ],
    prediction_fields=[
        ml_monitoring.spec.FieldSchema(
            name="predicted_species", data_type="categorical"
        )
    ],
)

##### Alternatively, if you have a large number of features, you can use the `transform_schema_from_bigquery` method to retrieve the schema and modify it as needed:

In [None]:
from vertexai.resources.preview.ml_monitoring.spec import schema

MODEL_MONITORING_SCHEMA = schema.transform_schema_from_bigquery(
    query=f"select * except(timestamp) from {FAKE_DATA_BQ_TABLE}",
    prediction_fields="predicted_species",
)
print(MODEL_MONITORING_SCHEMA.to_json())

Modify the schema:

In [None]:
# Change feature `island` to categorical
MODEL_MONITORING_SCHEMA.feature_fields[0].data_type = "categorical"
# Change feature `sex` to categorical
MODEL_MONITORING_SCHEMA.feature_fields[5].data_type = "categorical"
# Change prediction output `predicted_species` to categorical
MODEL_MONITORING_SCHEMA.prediction_fields[0].data_type = "categorical"

In [None]:
print(MODEL_MONITORING_SCHEMA.to_json())

#### (Optional) Define the training dataset

The training dataset can serve as the baseline dataset to calculate monitoring metrics. You can register the training dataset in the model monitor.

In [None]:
from vertexai.resources.preview import ml_monitoring

# Copy files to your projects gs bucket to avoid permission issues.
# Ignore any error(s) for bucket already exists.
PUBLIC_TRAINING_DATASET = (
    "gs://cloud-samples-data/vertex-ai/model-monitoring/penguins/penguins_training.csv"
)
TRAINING_URI = f"{BUCKET_URI}/model-monitoring/penguins/penguins_training.csv"

! gsutil copy $PUBLIC_TRAINING_DATASET $TRAINING_URI

TRAINING_DATASET = ml_monitoring.spec.MonitoringInput(
    gcs_uri=TRAINING_URI, data_format="csv"
)

#### Create a model monitor resource

A model monitor is a top-level resource to manage your metrics and model monitoring jobs.

In [None]:
from vertexai.resources.preview import ml_monitoring

my_model_monitor = ml_monitoring.ModelMonitor.create(
    project=PROJECT_ID,
    location=LOCATION,
    display_name="penguins_model_monitor",
    model_name=model.resource_name,
    model_version_id="1",
    training_dataset=TRAINING_DATASET,
    model_monitoring_schema=MODEL_MONITORING_SCHEMA,
)
MODEL_MONITOR_ID = my_model_monitor.name
print(f"MODEL MONITOR {MODEL_MONITOR_ID} created.")

### Step 5: Run an on-demand model monitoring job

#### Define the monitoring objective configs

For tabular models, Model Monitoring supports the following objectives:

*   **Input feature drift detection**

    Model Monitoring offers drift analysis for both categorical and numeric feature types, with the following supported metrics:

    *    Categorical Feature: `Jensen Shannon Divergence`, `L Infinity`
    *    Numeric Feature: `Jensen Shannon Divergence`

    You can choose to analyze only the features of interest by specifying them in the `features` fields of the `ml_monitoring.spec.DataDriftSpec` specification. If not specified, all input features in the model schema are analyzed. Additionally, you have the option to set default thresholds for categorical or numeric features, or you can specify thresholds for individual features. If the detected drift surpasses a threshold, an alert is sent through email or another notification channel.

*  **Prediction output drift detection**

    Similar to input feature drift detection, prediction output drift detection identifies data drift in the prediction outputs.

*   **Feature attribution drift detection**

    For model outside Vertex AI which don't have the model artifact or container information, the feature attribution score drift detection is not supported.

In the following example, we set the `FEATURE_DRIFT_SPEC` and `PREDICTION_OUTPUT_DRIFT_SPEC`, which is assembled in the `tabular_objective_spec`.

In [None]:
from vertexai.resources.preview import ml_monitoring

DEFAULT_THRESHOLD_VALUE = 0.001

FEATURE_THRESHOLDS = {
    "culmen_length_mm": DEFAULT_THRESHOLD_VALUE,
    "body_mass_g": 0.002,
}

FEATURE_DRIFT_SPEC = ml_monitoring.spec.DataDriftSpec(
    categorical_metric_type="l_infinity",
    numeric_metric_type="jensen_shannon_divergence",
    default_categorical_alert_threshold=0.001,
    default_numeric_alert_threshold=0.002,
    feature_alert_thresholds=FEATURE_THRESHOLDS,
)

PREDICTION_OUTPUT_DRIFT_SPEC = ml_monitoring.spec.DataDriftSpec(
    categorical_metric_type="l_infinity",
    numeric_metric_type="jensen_shannon_divergence",
    default_categorical_alert_threshold=0.001,
    default_numeric_alert_threshold=0.001,
)

#### Define the alert notification and metrics output spec.

We support various methods of notification:

*   Email
*   [Notification Channel](https://cloud.google.com/monitoring/support/notification-options)
*   [Cloud Logging](https://cloud.google.com/logging/docs?_gl=1*tdcri2*_up*MQ..&gclid=Cj0KCQjwir2xBhC_ARIsAMTXk84diOnqqpDckjOZUas26cUXUgEAgEGT9uFpz9tTvkfUjmVnRs7lQuwaAjiwEALw_wcB&gclsrc=aw.ds)  

In this notebook, we use email as an example.

In [None]:
import os

from vertexai.resources.preview import ml_monitoring

EMAIL = "[your-email-address]"  # @param {type:"string"}
if os.getenv("IS_TESTING"):
    EMAIL = "noreply@google.com"

NOTIFICATION_SPEC = ml_monitoring.spec.NotificationSpec(
    user_emails=[EMAIL],
)

OUTPUT_SPEC = ml_monitoring.spec.OutputSpec(gcs_base_dir=BUCKET_URI)

#### Run Model Monitoring Jobs

Let's start a monitoring job for the feature drift detection (comparing training data and serving data).
In this example, training data is a CSV file from Google Cloud Storage and the serving data is from BigQuery. We support two options for connection:

* table_uri: Consumes all the features from the table.
* query: Using SQL query, you can select the features you are interested for analysis. Be sure to include the timestamp column if you'd like to specify the data window or set up continous monitoring.

**Example 1: Use BigQuery Table Uri**

In [None]:
import pandas as pd
from vertexai.resources.preview import ml_monitoring

TIMESTAMP = pd.Timestamp.utcnow().strftime("%Y%m%d%H%M%S")
JOB_DISPLAY_NAME = f"penguins_model_monitoring_job_{TIMESTAMP}"
model_monitoring_job_1 = my_model_monitor.run(
    display_name=JOB_DISPLAY_NAME,
    baseline_dataset=TRAINING_DATASET,
    target_dataset=ml_monitoring.spec.MonitoringInput(
        table_uri=f"bq://{PROJECT_ID}.{FAKE_DATA_BQ_TABLE}"
    ),
    tabular_objective_spec=ml_monitoring.spec.TabularObjective(
        # Input feature drift spec.
        feature_drift_spec=FEATURE_DRIFT_SPEC,
        # Prediction output drift spec.
        prediction_output_drift_spec=PREDICTION_OUTPUT_DRIFT_SPEC,
    ),
    notification_spec=NOTIFICATION_SPEC,
    output_spec=OUTPUT_SPEC,
)

**Example 2: Use SQL Query**

Let's create another model monitoring job using SQL query. Be sure to include the timestamp column if you'd like to specify the time specification.

In [None]:
TIMESTAMP = pd.Timestamp.utcnow().strftime("%Y%m%d%H%M%S")
JOB_DISPLAY_NAME = f"penguins_model_monitoring_job_{TIMESTAMP}"
model_monitoring_job_2 = my_model_monitor.run(
    display_name=JOB_DISPLAY_NAME,
    baseline_dataset=TRAINING_DATASET,
    target_dataset=ml_monitoring.spec.MonitoringInput(
        query=f"select island, culmen_length_mm, body_mass_g, predicted_species, timestamp from {PROJECT_ID}.{FAKE_DATA_BQ_TABLE}",
        timestamp_field="timestamp",
        window="2h",
    ),
    tabular_objective_spec=ml_monitoring.spec.TabularObjective(
        # Input feature drift spec.
        feature_drift_spec=FEATURE_DRIFT_SPEC,
        # Prediction output drift spec.
        prediction_output_drift_spec=PREDICTION_OUTPUT_DRIFT_SPEC,
    ),
    notification_spec=NOTIFICATION_SPEC,
    output_spec=OUTPUT_SPEC,
)

In [None]:
my_model_monitor.list_jobs()

### Step 6: Wait for the Model Monitoring Job to complete and verify the result

#### Check email

##### Once the model monitoring job begins running (it will start after the batch prediction jobs have finished), you will receive an email as follows:

<img src="https://services.google.com/fh/files/misc/create_job_email.png" />

##### Once the monitoring job is complete, should any anomalies be detected, you will receive an email similar to the following:

<img src="https://services.google.com/fh/files/misc/place_holder_job_anomalies.png" />

#### Check monitoring metrics: Google Cloud Console

To view Model Monitoring metrics in the [Google Cloud Console](https://console.cloud.google.com/vertex-ai/model-monitoring/model-monitors), go to the **Monitoring** tab under **Vertex AI.**

<img src="https://services.google.com/fh/files/misc/place_holder_job_details.gif" />

#### Check monitoring metrics: Cloud Storage bucket

Run the following to view Model Monitoring metrics stored in the Cloud Storage bucket.  

In [None]:
try:
    my_model_monitor.show_feature_drift_stats(model_monitoring_job_1.name)
except Exception as e:
    print(e)

In [None]:
try:
    my_model_monitor.show_output_drift_stats(model_monitoring_job_1.name)
except Exception as e:
    print(e)

In [None]:
try:
    my_model_monitor.show_feature_drift_stats(model_monitoring_job_2.name)
except Exception as e:
    print(e)

In [None]:
try:
    my_model_monitor.show_output_drift_stats(model_monitoring_job_2.name)
except Exception as e:
    print(e)

### Step 7: Schedule Continous Model Monitoring

To set up continous model monitoring, follow the example below to create a schedule. You can create multiple schedules for your model monitor.


The following example monitors drift in both input features and prediction outputs. The schedule is configured to activate the model monitoring job every hour on the hour, such as at 00:00, 01:00, and so on. Each job analyzes data from the preceding one-hour window. For instance, if a job is scheduled for 6:00 a.m., it analyzes the data collected from 5:00 a.m. to 6:00 a.m.

In [None]:
# Every 1 hour at :00, for example 1:00, 2:00..
CRON = "0 * * * *"  # @param {type:"string"}
SCHEDULE_DISPLAY_NAME = "penguins-continous-drift-detection"

In [None]:
model_monitoring_schedule = my_model_monitor.create_schedule(
    display_name=SCHEDULE_DISPLAY_NAME,
    cron=CRON,
    baseline_dataset=ml_monitoring.spec.MonitoringInput(
        table_uri=f"bq://{PROJECT_ID}.{FAKE_DATA_BQ_TABLE}",
        timestamp_field="timestamp",
        window="1h",
        offset="1h",
    ),
    target_dataset=ml_monitoring.spec.MonitoringInput(
        table_uri=f"bq://{PROJECT_ID}.{FAKE_DATA_BQ_TABLE}",
        timestamp_field="timestamp",
        window="1h",
    ),
    tabular_objective_spec=ml_monitoring.spec.TabularObjective(
        # Input feature drift spec.
        feature_drift_spec=FEATURE_DRIFT_SPEC,
        # Prediction output drift spec.
        prediction_output_drift_spec=PREDICTION_OUTPUT_DRIFT_SPEC,
    ),
    notification_spec=ml_monitoring.spec.NotificationSpec(
        user_emails=[EMAIL],
    ),
    output_spec=ml_monitoring.spec.OutputSpec(gcs_base_dir=BUCKET_URI),
)

SCHEDULE_RESOURCE_NAME = model_monitoring_schedule.name
print(f"Schedule {SCHEDULE_RESOURCE_NAME} created.")

#### Pause Schedule

Run the following to pause the model monitoring schedule:

In [None]:
my_model_monitor.pause_schedule(SCHEDULE_RESOURCE_NAME)

#### Resume Schedule

Run the following to resume a paused model monitoring schedule:

In [None]:
my_model_monitor.resume_schedule(SCHEDULE_RESOURCE_NAME)

#### Update Schedule

Run the following to update the model monitoring schedule:

In [None]:
# Update to run every 1 hour at :30, for example 0:30, 1:30, 2:00..
my_model_monitor.update_schedule(
    schedule_name=SCHEDULE_RESOURCE_NAME, cron="30 * * * *"
)

#### Check monitoring schedules in Google Cloud Console

To check your Model Monitoring schedule in the Google Cloud Console, go to the Monitor tab under Vertex AI.

<img src="https://services.google.com/fh/files/misc/place_holder_schedules.gif" />

### Step 8: Clean Up

If you no longer need your model monitoring resources, run the following to delete them:

In [None]:
from google.cloud import bigquery

# When no jobs are running, delete the model monitor.
my_model_monitor.delete(force=True)

# Delete the model.
model.delete()

# Delete BQ logging table.
bqclient = bigquery.Client(project=PROJECT_ID)
# Delete the dataset (including all tables)
bqclient.delete_dataset(FAKE_DATA_BQ_DATASET, delete_contents=True, not_found_ok=True)