In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# E2E ML on GCP: Get started with serving from Vertex AI Feature Store

<table align="left">
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage6/get_started_vertex_feature_store_serving.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
    
  <td>
        <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage6/get_started_vertex_feature_store_serving.ipynb">
        <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
        </a>
  </td>
    
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/ml_ops/stage6/get_started_vertex_feature_store_serving.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
      Open in Vertex AI Workbench
    </a>
  </td>
    
</table>
<br/><br/><br/>

## Overview


This tutorial demonstrates how to use Vertex AI for E2E MLOps on Google Cloud in production. This tutorial covers stage 3 : serving: get started with serving from Feature Store.

### Objective

In this tutorial, you learn how to use `Vertex AI Feature Store` to train a model and subsequently to serve features when doing online and batch prediction.

This tutorial uses the following Google Cloud ML services:

- `Vertex AI Feature Store`
- `Vertex AI Training`
- `Vertex AI Prediction`

The steps performed include:

- Creating a Vertex AI `Featurestore` resource.
    - Creating `EntityType` resources for the `Featurestore` resource.
    - Creating `Feature` resources for each `EntityType` resource.
- Import feature values (entity data items) into `Featurestore` resource.
    - From a Cloud Storage location.
    - From a pandas DataFrame.
- Perform online prediction from a `Featurestore` resource.
- Perform batch prediction from a `Featurestore` resource.

### Dataset

The dataset used in this notebook consists of order items data since 2018 for an online ecommerce
store. This dataset is publicly available at `bigquery-public-data.thelook_ecommerce.order_items`
BigQuery table which can be accessed by pinning the bigquery-public-data project in BigQuery.

The table consists of various fields related to each of the order items like the order_id, product_id,
user_id, status, and price when it is created when it has been shipped, etc. Among these fields, the
current notebook makes use of the following fields assuming their purpose is as described below :

* user_id: The Id of the user.
* product_id: The Id of the product.
* created_at: When the user has placed the order.
* status: The status of the order (Shipped, Processing, Cancelled, Returned, and Completed).

The dataset is used to train a Recommender model.

### Costs
This tutorial uses billable components of Google Cloud:

- Vertex AI
- Cloud Storage
- BigQuery

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing) and [BigQuery pricing](https://cloud.google.com/bigquery/pricing) and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Installations

Install the following packages to further running this notebook.

In [None]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME") and not os.getenv("VIRTUAL_ENV")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

# Install the dependecies
! pip3 install --upgrade google-cloud-aiplatform \
                         google-cloud-bigquery \
                         pyarrow \
                         pandas {USER_FLAG} --quiet

### Restart the kernel

Once you've installed the additional packages, you need to restart the notebook kernel so it can find the packages.

In [None]:
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## Before you begin

### Set up your Google Cloud project

**The following steps are required, regardless of your notebook environment.**

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.

1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

1. [Enable the Vertex AI, Compute Engine, Cloud Storage and Cloud Logging APIs](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com,compute_component,storage_component,logging).

1. If you are running this notebook locally, you need to install the [Cloud SDK](https://cloud.google.com/sdk).

1. Enter your project ID in the cell below. Then run the cell to make sure the
Cloud SDK uses the right project for all the commands in this notebook.

**Note**: Jupyter runs lines prefixed with `!` as shell commands, and it interpolates Python variables prefixed with `$` into these commands.

#### Set your project ID

**If you don't know your project ID**, you may be able to get your project ID with `gcloud` command below .

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = ! gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

Set the default project ID in current enviornment

In [None]:
! gcloud config set project $PROJECT_ID

#### Region

You can also change the `REGION` variable, which is used for operations
throughout the rest of this notebook.  Below are regions supported for Vertex AI. We recommend that you choose the region closest to you.

- Americas: `us-central1`
- Europe: `europe-west4`
- Asia Pacific: `asia-east1`

You may not use a multi-regional bucket for training with Vertex AI. Not all regions provide support for all Vertex AI services.

Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [None]:
REGION = "[your-region]"  # @param {type: "string"}

if REGION == "[your-region]":
    REGION = "us-central1"

#### UUID

If you are in a live tutorial session, you might be using a shared test account or project. To avoid name collisions between users on resources created, you create a uuid for each instance session, and append it onto the name of resources you create in this tutorial.

In [None]:
import random
import string


# Generate a uuid of a specifed length(default=8)
def generate_uuid(length: int = 8) -> str:
    return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))


UUID = generate_uuid()

### Authenticate your Google Cloud account

**If you are using Vertex AI Workbench Notebooks**, your environment is already authenticated. Skip this step.

**If you are using Colab**, run the cell below and follow the instructions when prompted to authenticate your account via oAuth.

**Otherwise**, follow these steps:

In the Cloud Console, go to the [Create service account key](https://console.cloud.google.com/apis/credentials/serviceaccountkey) page.

1. **Click Create service account**.

2. In the **Service account name** field, enter a name, and click **Create**.

3. In the **Grant this service account access to project** section, click the Role drop-down list. Type "Vertex" into the filter box, and select **Vertex Administrator**. Type "Storage Object Admin" into the filter box, and select **Storage Object Admin**.

4. Click Create. A JSON file that contains your key downloads to your local environment.

5. Enter the path to your service account key as the GOOGLE_APPLICATION_CREDENTIALS variable in the cell below and run the cell.

In [None]:
# IMPORTANT - If you are using Vertex AI Workbench Notebooks, your environment is already authenticated. Skip this step.

# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your GCP account. This provides access to your
# Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

import os
import sys

# If on Vertex AI Workbench, then don't execute this code
IS_COLAB = "google.colab" in sys.modules
if not os.path.exists("/opt/deeplearning/metadata/env_version") and not os.getenv(
    "DL_ANACONDA_HOME"
):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this notebook locally, replace the string below with the
    # path to your service account key and run this cell to authenticate your GCP
    # account.
    elif not os.getenv("IS_TESTING"):
        %env GOOGLE_APPLICATION_CREDENTIALS ''

### Create a Cloud Storage bucket

**The following steps are required, regardless of your notebook environment.**

When you initialize the Vertex AI SDK for Python, you specify a Cloud Storage staging bucket. The staging bucket is where all the data associated with your dataset and model resources are retained across sessions.

Set the name of your Cloud Storage bucket below. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.

In [None]:
BUCKET_NAME = "[your-bucket-name]"  # @param {type:"string"}
BUCKET_URI = f"gs://{BUCKET_NAME}"

In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "[your-bucket-name]":
    BUCKET_NAME = PROJECT_ID + "vai-" + UUID
    BUCKET_URI = "gs://" + BUCKET_NAME

**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [None]:
! gsutil mb -l $REGION $BUCKET_URI

Finally, validate access to your Cloud Storage bucket by examining its contents:

In [None]:
! gsutil ls -al $BUCKET_URI

#### Set bucket access for Feature Store

In [None]:
! gsutil uniformbucketlevelaccess set on {BUCKET_URI}

### Import libraries and define constants

In [None]:
from datetime import datetime, timedelta

import google.cloud.aiplatform as aiplatform
import pandas as pd
from google.cloud import bigquery

### Initialize Vertex AI and BigQuery clients

In [None]:
aiplatform.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)
bqclient = bigquery.Client(project=PROJECT_ID)

#### Set pre-built containers

Set the pre-built Docker container image for training and prediction.


For the latest list, see [Pre-built containers for training](https://cloud.google.com/ai-platform-unified/docs/training/pre-built-containers).


For the latest list, see [Pre-built containers for prediction](https://cloud.google.com/ai-platform-unified/docs/predictions/pre-built-containers).

In [None]:
TF = "2.8".replace(".", "-")
TRAIN_VERSION = "tf-cpu.{}".format(TF)
DEPLOY_VERSION = "tf2-cpu.{}".format(TF)

TRAIN_IMAGE = "{}-docker.pkg.dev/vertex-ai/training/{}:latest".format(
    REGION.split("-")[0], TRAIN_VERSION
)
DEPLOY_IMAGE = "{}-docker.pkg.dev/vertex-ai/prediction/{}:latest".format(
    REGION.split("-")[0], DEPLOY_VERSION
)

print("Training:", TRAIN_IMAGE)
print("Deployment:", DEPLOY_IMAGE)

#### Set machine type

Next, set the machine type to use for training.

- Set the variables `TRAIN_COMPUTE`/`DEPLOY_COMPUTE` to configure  the compute resources for the VMs you will use for for training and prediction.
 - `machine type`
     - `n1-standard`: 3.75GB of memory per vCPU.
     - `n1-highmem`: 6.5GB of memory per vCPU
     - `n1-highcpu`: 0.9 GB of memory per vCPU
 - `vCPUs`: number of \[2, 4, 8, 16, 32, 64, 96 \]

*Note: The following is not supported for training:*

 - `standard`: 2 vCPUs
 - `highcpu`: 2, 4 and 8 vCPUs

*Note: You may also use n2 and e2 machine types for training and deployment, but they do not support GPUs*.

In [None]:
MACHINE_TYPE = "n1-standard"

VCPU = "4"
TRAIN_COMPUTE = MACHINE_TYPE + "-" + VCPU
print("Train machine type", TRAIN_COMPUTE)
DEPLOY_COMPUTE = MACHINE_TYPE + "-" + VCPU
print("Deploy machine type", DEPLOY_COMPUTE)

## Introduction to Vertex AI Feature Store

Let's assume you have a recommendation model that predicts a coupon to print on the back of a cash register receipt. Now, if that model was trained only on single transaction instances (what was bought and how much), then (in the past) you use an Apriori algorithm.

But now we have historical data on the customer (say it's indexed by credit card number). Like total purchases to date, average purchase per transaction, frequency of purchase by product category, etc. We use this "enriched data" to train a recommender system.

Now it's time to do a live prediction. You get a transaction from the cash register, but all it has is the credit card number and this transaction. It does not have the enriched data the model needs. During serving, the credit card number is used as an index to Feature Store to get the enriched data needed for the model.

On the other hand, let's say the enriched data the model was trained on was timestamped on June 1st. The current transaction is from June 15th. Assume that the user has made other transactions between June 1st and 15th, and the enriched data has been continuously updated in Feature Store. But the model was trained on June 1st data. Feature Store knows the version number and serves the June 1st version to the model (not the current June 15th). Otherwise, if you used June 15th data, you would have training-serving skew.

Another problem here is the data drift. Things change and suddenly one day, everybody is buying toilet paper! There is a significant change in the distribution of existing enriched data from the distribution that the deployed model was trained on. Feature Store can detect changes/thresholds in distribution changes and trigger a notification for retraining the model.

Learn more about [Vertex AI Feature Store API](https://cloud.google.com/vertex-ai/docs/featurestore).

## Vertex AI Feature Store data model

Vertex AI Feature Store organizes data with the following 3 important hierarchical concepts:

        Featurestore -> EntityType -> Feature

- `Featurestore`: the place to store your features.
- `EntityType`: under a `Featurestore`, an `EntityType` describes an object to be modeled, real one or virtual one.
- `Feature`: under an `EntityType`, a `Feature` describes an attribute of the `EntityType`.

Learn more about [Vertex AI Feature Store data model](https://cloud.google.com/vertex-ai/docs/featurestore/concepts).

In this ecommerce example, you will create a `Featurestore` resource called ecomm_recommendation. This `Featurestore` resource has 2 entity types: 
- `users`: The entity type has the `product_id`, and `rating` features.
- `products`: The entity type has the `user_list` and `product_name` features.

## Create a `Featurestore` resource

First, you create a `Featurestore` for the dataset using the `Featurestore.create()` method, with the following parameters:

- `featurestore_id`: The name of the feature store.
- `online_store_fixed_node_count`: Configuration settings for online serving from the feature store.
- `project`: The project ID.
- `location`: The location (region).

In [None]:
# Represents featurestore resource path.
FEATURESTORE_NAME = "ecomm_recommendation" + UUID

featurestore = aiplatform.Featurestore.create(
    featurestore_id=FEATURESTORE_NAME,
    online_store_fixed_node_count=1,
    project=PROJECT_ID,
    location=REGION,
)

print(featurestore)

### Get a `Featurestore` resource

You can get a specifed `Featurestore` resource in your project using the `Featurestore()` initializer, with the following parameters:

- `featurestore_name`: The name for the `Featurestore` resource.
- `project`: The project ID.
- `location`: The location (region).

In [None]:
featurestore = aiplatform.Featurestore(
    featurestore_name=FEATURESTORE_NAME, project=PROJECT_ID, location=REGION
)
print(featurestore)

## Create entity types for your `Featurestore` resource

Next, you create the `EntityType` resources for your `Featurestore` resource using the `create_entity_type()` method, with the following parameters:

- `entity_type_id`: The name of the `EntityType` resource.
- `description`:  A description of the entity type.

In [None]:
for name, description in [
    ("users", "Description of the user"),
    ("products", "Description of the product"),
]:
    entity_type = featurestore.create_entity_type(
        entity_type_id=name, description=description
    )
    print(entity_type)

### Add `Feature` resources for your `EntityType` resources

Next, you create the `Feature` resources for each of the `EntityType` resources in your `Featurestore` resource using the `create_feature()` method, with the following parameters:

- `feature_id`: The name of the `Feature` resource.
- `description`: A description of the feature.
- `value_type`: The data type for the feature.

In [None]:
def create_features(featurestore_name, entity_name, features):
    entity_type = aiplatform.EntityType(
        entity_type_name=entity_name, featurestore_id=featurestore_name
    )

    for feature in features:
        feature = entity_type.create_feature(
            feature_id=feature[0], description=feature[1], value_type=feature[2]
        )
        print(feature)


create_features(
    FEATURESTORE_NAME,
    "users",
    [
        ("product_id", "product description", "INT64"),
        ("rating", "rating of the product", "DOUBLE"),
    ],
)

create_features(
    FEATURESTORE_NAME,
    "products",
    [
        ("users_list", "List of user ids who bought product", "STRING_ARRAY"),
    ],
)

## Perform feature engineering on the dataset

Next you perform feature engineering on the public BigQuery dataset and then import them into Feature Store.

### Load the BigQuery dataset into a dataframe

* Load the data from BigQuery into a pandas dataFrame.
* Select the columns to use.
    - user_id
    - product_id
    - created_at
    - status

In [None]:
query_string = """
SELECT
    CAST(user_id AS STRING) AS user_id,
    product_id,
    created_at,
    status
FROM
    `bigquery-public-data.thelook_ecommerce.order_items`
"""

df_bq_table = bqclient.query(query_string).result().to_dataframe()

print(df_bq_table.shape)
df_bq_table.head()

### Derive a new column ratings

Next, you add a new column for the ratings. Since the ratings are numerical, you derive them from the existing status column, as follows:

- Map the status string values to a numerical range (0..4). 
- Normalize the values between 0 and 1.

In [None]:
# map the status to a rating
rating_map = {
    "Cancelled": 0,
    "Returned": 1,
    "Processing": 2,
    "Shipped": 3,
    "Complete": 4,
}

df_bq_table["rating"] = df_bq_table["status"].map(rating_map)
print(df_bq_table.head())

# Normalize the ratings
min_rating = min(df_bq_table["rating"])
max_rating = max(df_bq_table["rating"])

df_bq_table["rating"] = (
    df_bq_table["rating"]
    .apply(lambda x: (x - min_rating) / (max_rating - min_rating))
    .values
)
print(df_bq_table.head())

### Filter the dataset

Next, filter the dataset to only users who bought products until last week, and then drop the column 'status'.

In [None]:
PAST_WEEK_DATE = datetime.now() - pd.to_timedelta("7day")

df_filtered = df_bq_table[
    (df_bq_table["created_at"] < PAST_WEEK_DATE.isoformat() + "Z")
].reset_index()

result = df_filtered.groupby(["product_id"])["user_id"].apply(list).to_dict()

df_prod_user_list = pd.DataFrame(result.items(), columns=["product_id", "users_list"])
df_prod_user_list["product_id"] = df_prod_user_list["product_id"].astype("string")
print(df_prod_user_list.head())

df_bq_table.drop("status", axis=1, inplace=True)

### Reimport preprocessed data into BigQuery

#### Create destination table for preprocessed data.

Next, you create a BigQuery dataset where you will subsequently add tables for the preprocessed data.

In [None]:
DESTINATION_DATASET = f"product_recommendation_{UUID}"

USERS_SOURCE_TABLE_NAME = "user_prod_rating_data"
USERS_SOURCE_TABLE_URI = (
    f"bq://{PROJECT_ID}.{DESTINATION_DATASET}.{USERS_SOURCE_TABLE_NAME}"
)

PRODUCTS_SOURCE_TABLE_NAME = "prod_users_list_data"
PRODUCTS_SOURCE_TABLE_URI = (
    f"bq://{PROJECT_ID}.{DESTINATION_DATASET}.{PRODUCTS_SOURCE_TABLE_NAME}"
)

# Create destination dataset
dataset_id = "{}.{}".format(PROJECT_ID, DESTINATION_DATASET)
dataset = bigquery.Dataset(dataset_id)
dataset.location = REGION
dataset = bqclient.create_dataset(dataset)
print(dataset)

#### Create table for filtered dataset

Next, you create a table and load the filtered dataset.

In [None]:
# Create a table
schema = [
    bigquery.SchemaField("user_id", "STRING"),
    bigquery.SchemaField("product_id", "INT64"),
    bigquery.SchemaField("created_at", "TIMESTAMP"),
    bigquery.SchemaField("rating", "FLOAT"),
]

table_id = f"{PROJECT_ID}.{DESTINATION_DATASET}.{USERS_SOURCE_TABLE_NAME}"
table = bigquery.Table(table_id, schema=schema)
bqclient.create_table(table, exists_ok=True)


# Load data to BQ
job = bqclient.load_table_from_dataframe(df_bq_table, table_id)
print(job.errors, job.state)
while job.running():
    from time import sleep

    sleep(30)
    print("Running ...")
print(job.errors, job.state)

#### Create table for the products user list.

Create the new table for the products users list table.

In [None]:
from time import sleep

# Create a table
schema = [
    bigquery.SchemaField("product_id", "STRING"),
    bigquery.SchemaField("users_list", "STRING", "REPEATED"),
]
table_id = f"{PROJECT_ID}.{DESTINATION_DATASET}.{PRODUCTS_SOURCE_TABLE_NAME}"
table = bigquery.Table(table_id, schema=schema)
bqclient.create_table(table, exists_ok=True)

# Load data to BQ
job = bqclient.load_table_from_dataframe(df_prod_user_list, table_id)
print(job.errors, job.state)
while job.running():
    sleep(30)
    print("Running ...")
print(job.errors, job.state)

## Import the feature data into your `Featurestore` resource

Next, you import the feature data for your `Featurestore` resource. Once imported, you can use these feature values for online and offline (batch) serving.

### Data layout

Each imported `EntityType` resource data must have an ID. Also, each `EntityType` resource data item can optionally have a timestamp, specifying when the feature values were generated.

When importing, specify the following in your request:

- Data source format: BigQuery Table/Avro/CSV/Pandas Dataframe
- Data source URL
- Destination: featurestore/entity types/features to be imported

In this tutorial, the schema is:

    For the Users entity:
    schema = {
        "name": "users",
        "fields": [
            {
                "name":"product_id",
                "type":["null","integer"]
            },
            {
                "name":"rating",
                "type":["null","double"]
                },
        ]
    }
    
    For the Products entity:
    schema = {
        "name": "products",
        "fields": [
            {
                "name":"users_list",
                "type":["null","string_array"]
            }
        ]
    }


### Importing the feature values from BigQuery

You import the feature values for the `EntityType` resources using the `ingest_from_bq()` method, with the following parameters:

- `entity_id_field`: The identifier name for the parent `EntityType` resource.
- `feature_ids`: A list of identifier names for `Feature` resources' data to add to the `EntityType` resource.
- `feature_time`: The field corresponding to the timestamp for the features being entered.
- `bq_source_uri`: The BigQuery table to import data from

In [None]:
entity_type = featurestore.get_entity_type("users")
response = entity_type.ingest_from_bq(
    entity_id_field="user_id",
    feature_ids=["product_id", "rating"],
    feature_time="created_at",
    bq_source_uri=f"bq://{PROJECT_ID}.{DESTINATION_DATASET}.{USERS_SOURCE_TABLE_NAME}",
)
print(response)


def past_6days():
    return datetime.now() - timedelta(days=6)


entity_type = featurestore.get_entity_type("products")
response = entity_type.ingest_from_bq(
    entity_id_field="product_id",
    feature_ids=["users_list"],
    feature_time=past_6days(),
    bq_source_uri=f"bq://{PROJECT_ID}.{DESTINATION_DATASET}.{PRODUCTS_SOURCE_TABLE_NAME}",
)
print(response)

## Vertex AI Feature Store serving

The Vertex AI Feature Store service provides the following two services for serving features from a `Featurestore` resource:

- Online serving - low-latency serving of small batches of features (prediction).

- Batch serving - high-throughput serving of large batches of features (training and prediction).

## Batch Serving

The Vertex AI Feature Store's batch serving service is optimized for serving large batches of features in real-time with high throughput, typically for training a model or batch prediction.

One can batch serve to the following destinations:

- BigQuery table
- Cloud Storage location
- Dataframe

### Output dataset

In this notebook, you train a model using data from your feature store in CSV format from Google Cloud Storage

### Use case

**The task** is to prepare a dataset to train a model, which recommends products for a given user. To achieve this, you need 2 sets of input:

*   Features: you already imported into the feature store.
*   Labels: the ground-truth data recorded that is rating.

To be more specific, the ground-truth observation is described in Table 1 and the desired dataset is described in Table 2. Each row in Table 2 is a result of joining the imported feature values from Vertex AI Feature Store according to the entity IDs and timestamps in Table 1. In this example,  the `product_id` and `rating` features from `users` are chosen to batch train. 

batch_serve_to_df method takes Table 1 as
input for read_instances_df argument joins all required feature values from the feature store, and returns Table 2 for training.

<h4 align="center">Table 1. Ground-truth Data</h4>

users | timestamp            
----- | -------------------- 
87228 | 2022-07-01T00:00:00Z 
16173 | 2022-07-01T18:09:43Z 
...   | ...      | ...     


<h4 align="center">Table 2. Expected Training Data Generated by batch_serve_to_df (Positive Samples)</h4>

feature_timestamp            | entity_type_users | product_id | rating |
-------------------- | ----------------- | --------------- | ---------------- |
2022-07-01T00:00:00Z | 87228 | 4567 | 0.5 |
2022-07-01T00:00:00Z | 16173 | 5490 | 0.75 |
... | ... | ... | ... | ...  

#### Why timestamp?

Note that there is a `timestamp` column in Table 2. This indicates the time when the ground-truth was observed. This is to avoid data inconsistency.

For example, the 1st row of Table 2 indicates that id `87228` brought product on `2022-07-01T00:00:00Z`. The feature store keeps feature values for all timestamps but fetches feature values *only* at the given timestamp during batch serving.

### Batch Serve To DataFrame

Assemble the request which specifies the following info:

*   Where is the label data, i.e., Table 1.
*   Which features are read, i.e., the column names in Table 1.

Next, you get the dataframe from the feature store using batch_serve_to_df and store it into a csv file that will be used for training the recommender model in Vertex AI.

* Export the entityType Id (`users`) and `timestamp` columns as csv into the created GCS bucket.

In [None]:
from datetime import timezone

past_week_date = (datetime.now() - pd.to_timedelta("7day")).isoformat() + "Z"
df_sorted = df_bq_table.sort_values("created_at", ascending=False, ignore_index=True)
df_sorted.rename(columns={"user_id": "users"}, inplace=True)
df_sorted = df_sorted[df_sorted["created_at"] <= past_week_date].reset_index()
df_sorted["created_at"] = df_sorted["created_at"].astype(str)
df_sorted["timestamp"] = df_sorted["created_at"].map(
    lambda x: datetime.fromisoformat(x).astimezone(timezone.utc)
)
df_batch = df_sorted[["users", "timestamp"]]

df_batch.head()

### Batch Read Feature Values

 You batch serve entity data items to a dataframe using the `batch_serve_to_df` method with the following parameters:

- `serving_feature_ids`: A dictionary of entity type and corresponding features to serve.
- `read_instances_uri`: A Cloud Storage location to read the entity data items from.

The output is stored in a BigQuery table.

In [None]:
batch_serve = featurestore.batch_serve_to_df(
    serving_feature_ids={"users": ["product_id", "rating"]}, read_instances_df=df_batch
)

batch_serve.head()

### Export dataframe data to CSV

Next, you export the dataframe data to a CSV file in Cloud Storage.

In [None]:
CSV_FILE = f"{BUCKET_URI}/data.csv"

batch_serve.to_csv(CSV_FILE, index=False)

## Train a recommender model

In this section, you train a custom model for recommending products for a given user with data from the `batch_serve_to_df` method.

You create a custom trained model from a Python script in a Docker container using the Vertex AI SDK for Python, and then get a prediction from the deployed model by sending data.

The steps performed include:

- Train a Vertex AI custom `TrainingPipeline` to train a TensorFlow model.
- Deploy the `Model` resource to a serving `Endpoint` resource.
- Make a prediction.


### Train a model

There are two ways you can train a model using a container image:

- **Use a Vertex AI pre-built container**. If you use a pre-built training container, you must additionally specify a Python package to install into the container image. This Python package contains your training code.

- **Use your own custom container image**. If you use your own container, the container image must contain your training code.

### Define the command args for the training script

Prepare the command-line arguments to pass to your training script.
- `args`: The command line arguments to pass to the corresponding Python module. In this example, they are:
  - `"--epochs=" + EPOCHS`: The number of epochs for training.
  - `"--batch_size=" + BATCH_SIZE`: The batch size for training.
  - `"--training_data=" + GCS_PATH`: The path to the csv with training data from feature store.

In [None]:
EPOCHS = 20
BATCH_SIZE = 10

CMDARGS = [
    "--epochs=" + str(EPOCHS),
    "--batch_size=" + str(BATCH_SIZE),
    "--training_data=" + CSV_FILE,
]

#### Training script

Next, you write the contents of the training script, `task.py`. In summary, the script does the following:

- Loads the csv data from Google Cloud Storage.
- Builds a model using TF.Keras model API.
- Compiles the model (`compile()`).
- Trains the model (`fit()`) with epochs and batch size according to the arguments `args.epochs` and `args.batch_size`
- Gets the directory where to save the model artifacts from the environment variable `AIP_MODEL_DIR`. This variable is [set by the training service](https://cloud.google.com/vertex-ai/docs/training/code-requirements#environment-variables).
- Saves the trained model to the model directory.

In [None]:
%%writefile task.py

import argparse
import tensorflow as tf
import numpy as np
import os

import pandas as pd


# Read args
parser = argparse.ArgumentParser()
parser.add_argument('--epochs', dest='epochs',
                    default=10, type=int,
                    help='Number of epochs.')
parser.add_argument('--batch_size', dest='batch_size',
                    default=10, type=int,
                    help='Batch size.')
parser.add_argument('--training_data', dest='training_data', type=str,
                    help="URI of the training data in BQ")

args = parser.parse_args()


# Collect the arguments
training_data_uri = args.training_data

# Set up training variables
LABEL_COLUMN = "rating"
UNUSED_COLUMNS = ["timestamp","entity_type_users","product_id"]
NA_VALUES = ["NA", ".", " ", "", "null", "NaN"]

# # Possible categorical values
RATING = [0,1,2,3,4]

df_train = pd.read_csv(training_data_uri)

# Remove NA values
def clean_dataframe(df):
    return df.replace(to_replace=NA_VALUES, value=np.NaN).dropna()

df_train = clean_dataframe(df_train)

user_ids = df_train["entity_type_users"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}

product_ids = df_train["product_id"].unique().tolist()
product2product_encoded = {x: i for i, x in enumerate(product_ids)}

df_train["user"] = df_train["entity_type_users"].map(user2user_encoded)
df_train["product"] = df_train["product_id"].map(product2product_encoded)
NUM_USERS = len(user2user_encoded)
NUM_PRODUCTS = len(product2product_encoded)


def convert_dataframe_to_dataset(
    df_train,
):
    NUMERIC_COLUMNS = ["entity_type_users","product_id","rating"]
    df_train[NUMERIC_COLUMNS] = df_train[NUMERIC_COLUMNS].astype("float32")
    df_train = df_train.drop(columns=UNUSED_COLUMNS)

    df_train_x, df_train_y = df_train, df_train.pop(LABEL_COLUMN)

    y_train = np.asarray(df_train_y).astype("float32")

    # Convert to numpy representation
    x_train = np.asarray(df_train_x)

    dataset_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    return dataset_train

# Create datasets
dataset_train = convert_dataframe_to_dataset(df_train)

# Shuffle train set
dataset_train = dataset_train.shuffle(len(df_train))

EMBEDDING_SIZE = 50
class RecommenderNet(tf.keras.Model):
        def __init__(self, num_users, num_products, embedding_size, **kwargs):
            super(RecommenderNet, self).__init__(**kwargs)
            self.num_users = num_users
            self.num_products = num_products
            self.embedding_size = embedding_size
            self.user_embedding = tf.keras.layers.Embedding(
                num_users,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=tf.keras.regularizers.l2(1e-6),
            )
            self.user_bias = tf.keras.layers.Embedding(num_users, 1)
            self.product_embedding = tf.keras.layers.Embedding(
                num_products,
                embedding_size,
                embeddings_initializer="he_normal",
                embeddings_regularizer=tf.keras.regularizers.l2(1e-6),
            )
            self.product_bias = tf.keras.layers.Embedding(num_products, 1)

        def call(self, inputs):
            user_vector = self.user_embedding(inputs[:, 0])
            user_bias = self.user_bias(inputs[:, 0])
            product_vector = self.product_embedding(inputs[:, 1])
            product_bias = self.product_bias(inputs[:, 1])
            dot_user_product = tf.tensordot(user_vector, product_vector, 2)
            # Add all the components (including bias)
            x = dot_user_product + user_bias + product_bias
            # The sigmoid activation forces the rating to between 0 and 1
            return tf.nn.sigmoid(x)

def create_model(num_users,num_products):
    # Create model
        model = RecommenderNet(num_users, num_products, EMBEDDING_SIZE)
        model.compile(
            loss=tf.keras.losses.BinaryCrossentropy(),
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        )
        return model


model = create_model(num_users=NUM_USERS,num_products=NUM_PRODUCTS)

dataset_train = dataset_train.batch(args.batch_size)

# Train the model
model.fit(dataset_train, epochs=args.epochs)

tf.saved_model.save(model, os.getenv("AIP_MODEL_DIR"))

### Train the model

Use the `CustomTrainingJob` class to define the `TrainingPipeline`. The class takes the following parameters:

- `display_name`: The user-defined name of this training pipeline.
- `script_path`: The local path to the training script.
- `container_uri`: The URI of the training container image.
- `requirements`: The list of Python package dependencies of the script.
- `model_serving_container_image_uri`: The URI of a container that can serve predictions for your model — either a pre-built container or a custom container.

Use the `run` function to start training. The function takes the following parameters:

- `args`: The command line arguments to be passed to the Python script.
- `replica_count`: The number of worker replicas.
- `model_display_name`: The display name of the `Model` if the script produces a managed `Model`.
- `machine_type`: The type of machine to use for training.
- `accelerator_type`: The hardware accelerator type.
- `accelerator_count`: The number of accelerators to attach to a worker replica.

The `run` function creates a training pipeline that trains and creates a `Model` object. After the training pipeline completes, the `run` function returns the `Model` object.

In [None]:
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
DEPLOYED_NAME = f"product-recommender-{UUID}-{TIMESTAMP}"

job = aiplatform.CustomTrainingJob(
    display_name=DEPLOYED_NAME,
    script_path="task.py",
    container_uri=TRAIN_IMAGE,
    requirements=["google-cloud-bigquery>=2.20.0", "db-dtypes"],
    model_serving_container_image_uri=DEPLOY_IMAGE,
)

# Start the training
model = job.run(
    model_display_name=DEPLOYED_NAME,
    args=CMDARGS,
    replica_count=1,
    machine_type=TRAIN_COMPUTE,
    accelerator_count=0,
)

### Deploy the model

Next, you deploy the trained model to an `Endpoint`. You can do this by calling the `deploy` function on the `Model` resource. This will do two things:

1. Create an `Endpoint` resource for deploying the `Model` resource.
2. Deploy the `Model` resource to the `Endpoint` resource.


The function takes the following parameters:

- `deployed_model_display_name`: A human-readable name for the deployed model.
- `traffic_split`: Percent of traffic at the endpoint that goes to this model, which is specified as a dictionary of one or more key/value pairs.
   - If only one model, then specify `{ "0": 100 }`, where "0" refers to this model being uploaded and 100 means 100% of the traffic.
   - If there are existing models on the endpoint, for which the traffic will be split, then use `model_id` to specify `{ "0": percent, model_id: percent, ... }`, where `model_id` is the ID of an existing `DeployedModel` on the endpoint. The percentages must add up to 100.
- `machine_type`: The type of machine to use for training.
- `accelerator_type`: The hardware accelerator type.
- `accelerator_count`: The number of accelerators to attach to a worker replica.
- `starting_replica_count`: The number of compute instances to initially provision.
- `max_replica_count`: The maximum number of compute instances to scale to. In this tutorial, only one instance is provisioned.

#### Traffic split

The `traffic_split` parameter is specified as a Python dictionary. You can deploy more than one instance of your model to an endpoint, and then set the percentage of traffic that goes to each instance.

You can use a traffic split to introduce a new model gradually into production. For example, if you had one existing model in production with 100% of the traffic, you could deploy a new model to the same endpoint, direct 10% of traffic to it, and reduce the original model's traffic to 90%. This allows you to monitor the new model's performance while minimizing the disruption to the majority of users.

#### Compute instance scaling

You can specify a single instance (or node) to serve your online prediction requests. This tutorial uses a single node, so the variables `MIN_NODES` and `MAX_NODES` are both set to `1`.

If you want to use multiple nodes to serve your online prediction requests, set `MAX_NODES` to the maximum number of nodes you want to use. Vertex AI auto-scales the number of nodes used to serve your predictions, up to the maximum number you set. Refer to the [pricing page](https://cloud.google.com/vertex-ai/pricing#prediction-prices) to understand the costs of autoscaling with multiple nodes.

#### Endpoint

The method will block until the model is deployed and eventually return an `Endpoint` object. If this is the first time a model is deployed to the endpoint, it may take a few additional minutes to complete the provisioning of resources.

In [None]:
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
DEPLOYED_NAME = f"product-recommender-{UUID}-{TIMESTAMP}"

TRAFFIC_SPLIT = {"0": 100}

MIN_NODES = 1
MAX_NODES = 1

endpoint = model.deploy(
    deployed_model_display_name=DEPLOYED_NAME,
    traffic_split=TRAFFIC_SPLIT,
    machine_type=DEPLOY_COMPUTE,
    min_replica_count=MIN_NODES,
    max_replica_count=MAX_NODES,
)

## Make a prediction
Finally, you make a online prediction to your recommender model that was deployed to an endpoint.

### Prepare the test item
You use a test item from the test slice of the dataset.

In [None]:
import os

import numpy as np
import pandas as pd

# Set up training variables
LABEL_COLUMN = "rating"
UNUSED_COLUMNS = ["timestamp", "entity_type_users", "product_id"]
NA_VALUES = ["NA", ".", " ", "", "null", "NaN"]

# # Possible categorical values
RATING = [0, 1, 2, 3, 4]

df_test = pd.read_csv(CSV_FILE)


# Remove NA values
def clean_dataframe(df):
    return df.replace(to_replace=NA_VALUES, value=np.NaN).dropna()


df_test = clean_dataframe(df_test)

user_ids = df_test["entity_type_users"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
product_ids = df_test["product_id"].unique().tolist()
product_encoded2product = {i: x for i, x in enumerate(product_ids)}
product2product_encoded = {x: i for i, x in enumerate(product_ids)}

df_test["user"] = df_test["entity_type_users"].map(user2user_encoded)
df_test["product"] = df_test["product_id"].map(product2product_encoded)

sample = df_test.sample(1)
user_id = sample["user"].values[0]
products_bought = sample["product"].to_list()
products_not_bought = (
    df_test[~df_test["product"].isin(products_bought)]["product"].unique().tolist()
)

instances_input = [[float(user_id), k] for k in products_not_bought]

### Send the prediction request
Next, you make the prediction request.

In [None]:
prediction = endpoint.predict(instances=instances_input)
print(prediction)

### Getting Top 10 products recommendation
Based upon the ratings predicted by recommendation model, We selected top 10 products for the selected `user_id`.

In [None]:
predictions_array = np.array(
    [prediction.predictions[k][0] for k in range(len(prediction.predictions))]
)
top_rating_indices = predictions_array.argsort()[-10:][::-1]
top_predictions = predictions_array[top_rating_indices]
top_10_products = [
    int(product_encoded2product.get(instances_input[k][1])) for k in top_rating_indices
]
print(top_10_products)

## Cleaning up
### Delete the BigQuery dataset

Use the method `delete_dataset()` to delete a BigQuery dataset along with all its tables, by setting the parameter `delete_contents` to `True`.

In [None]:
DESTINATION_DATASET = f"product_recommendation_{UUID}"
dataset_id = "{}.{}".format(PROJECT_ID, DESTINATION_DATASET)
dataset = bigquery.Dataset(dataset_id)
bqclient.delete_dataset(dataset, delete_contents=True)

### Delete a `Featurestore` resource

You can get a delete a specified `Featurestore` resource using the `delete()` method, with the following parameter:

- `force`: A flag indicating whether to delete a non-empy `Featurestore` resource.

In [None]:
featurestore.delete(force=True)

### Delete the Vertex AI `Model` and `Endpoint`

Next, undelpoy and delete the Vertex AI Model and Endpoint resource.

In [None]:
endpoint.undeploy_all()
endpoint.delete()
model.delete()

### Delete Google Cloud Bucket Bucket
Finally, you delete the Google Cloud Bucket

In [None]:
! gsutil -m rm -r $BUCKET_URI
! gsutil rb $BUCKET_URI