In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# FraudFinder - BigQuery ML - Model training and prediction

<table align="left">
  <td>
    <a href="https://console.cloud.google.com/ai-platform/notebooks/deploy-notebook?download_url=https://github.com/GoogleCloudPlatform/fraudfinder/raw/main/bqml/04_model_training_and_prediction.ipynb">
       <img src="https://www.gstatic.com/cloud/images/navigation/vertex-ai.svg" alt="Google Cloud Notebooks">Open in Cloud Notebook
    </a>
  </td> 
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/fraudfinder/blob/main/bqml/04_model_training_and_prediction.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Open in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/fraudfinder/blob/main/bqml/04_model_training_and_prediction.ipynb">
        <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
</table>

## Overview

[FraudFinder](https://github.com/googlecloudplatform/fraudfinder) is a series of labs on how to build a real-time fraud detection system on Google Cloud. Throughout the FraudFinder labs, you will learn how to read historical bank transaction data stored in data warehouse, read from a live stream of new transactions, perform exploratory data analysis (EDA), do feature engineering, ingest features into a feature store, train a model using feature store, register your model in a model registry, evaluate your model, deploy your model to an endpoint, do real-time inference on your model with feature store, and monitor your model.

### Objective

After feature engineering and ingesting data into a feature store, you are now ready to train and deploy your machine learning model to predict whether a transaction is fraudulent or not.

In this notebook, using the data in Vertex AI Feature Store that you previously ingested data into, you will train a model using BigQuery ML, register the model to Vertex AI Model Registry, and deploy it to an endpoint for real-time prediction. 

In this tutorial, you will learn how to:

- Train a logistic regression model in BigQuery using BigQuery ML
- Evaluate the model
- Test a prediction 
- Deploy to an endpoint on Vertex AI
- Make an online prediction

This tutorial uses the following Google Cloud data analytics and services:

- [BigQuery](https://cloud.google.com/bigquery/)
- [BigQuery ML](https://cloud.google.com/bigquery-ml/)
- [Vertex AI](https://cloud.google.com/vertex-ai/)

The steps performed include:

- Using Python & SQL to query the data in BigQuery
- Preparing the data for modeling
- Training a classification model using BigQuery ML and registering it to Vertex AI Model Registry
- Inspecting the model on Vertex AI Model Registry
- Deploying the model to an endpoint on Vertex AI
- Making sample online predictions to the model endpoint

### Costs 

This tutorial uses billable components of Google Cloud:

* BigQuery
* BigQuery ML
* Vertex AI

Learn about [BigQuery Pricing](https://cloud.google.com/bigquery/pricing), [BigQuery ML pricing](https://cloud.google.com/bigquery-ml/pricing), [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

### Load configuration settings from the setup notebook

Set the constants used in this notebook and load the config settings from the `00_environment_setup.ipynb` notebook.

In [None]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
BUCKET_NAME = f"{PROJECT_ID}-fraudfinder"
config = !gsutil cat gs://{BUCKET_NAME}/config/notebook_env.py
print(config.n)
exec(config.n)

### Import libraries

In [None]:
import pandas as pd
import json

from datetime import datetime, timedelta

from typing import Union

from google.cloud import aiplatform as vertex_ai
from google.cloud import bigquery

pd.set_option("display.max_columns", None)

### Define constants

In [None]:
START_DATE_TRAIN = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
END_DATE_TRAIN = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
CUSTOMERS_TABLE_NAME = f"customers_{END_DATE_TRAIN.replace('-', '')}"
TERMINALS_TABLE_NAME = f"terminals_{END_DATE_TRAIN.replace('-', '')}"
VERTEX_AI_MODEL_ID = 'bqml_fraud_classifier'
SERVING_FEATURE_IDS = {"customer": ["*"], "terminal": ["*"]}
READ_INSTANCES_TABLE = f"ground_truth_{END_DATE_TRAIN.replace('-', '')}"
READ_INSTANCES_URI = f"bq://{PROJECT_ID}.tx.{READ_INSTANCES_TABLE}"
BQ_TABLE_NAME = f"train_table_{END_DATE_TRAIN.replace('-', '')}"
TRAIN_TABLE_URI = f"bq://{PROJECT_ID}.tx.{BQ_TABLE_NAME}"

print(f"""
START_DATE_TRAIN \t= {START_DATE_TRAIN}
END_DATE_TRAIN \t\t= {END_DATE_TRAIN}
CUSTOMERS_TABLE_NAME \t= {CUSTOMERS_TABLE_NAME}
TERMINALS_TABLE_NAME \t= {TERMINALS_TABLE_NAME}
VERTEX_AI_MODEL_ID \t= {VERTEX_AI_MODEL_ID}
SERVING_FEATURE_IDS \t= {SERVING_FEATURE_IDS}
READ_INSTANCES_TABLE \t= {READ_INSTANCES_TABLE}
READ_INSTANCES_URI \t= {READ_INSTANCES_URI}
BQ_TABLE_NAME \t\t= {BQ_TABLE_NAME}
TRAIN_TABLE_URI \t= {TRAIN_TABLE_URI}
""")

#### Payload schema

The following schema will be used later in this notebook for prediction.

In [None]:
PAYLOAD_SCHEMA = {
    "tx_amount": "float64",
    "customer_id_nb_tx_1day_window": "int64",
    "customer_id_nb_tx_7day_window": "int64",
    "customer_id_nb_tx_14day_window": "int64",
    "customer_id_avg_amount_1day_window": "float64",
    "customer_id_avg_amount_7day_window": "float64",
    "customer_id_avg_amount_14day_window": "float64",
    "customer_id_nb_tx_15min_window": "int64",
    "customer_id_avg_amount_15min_window": "float64",
    "customer_id_nb_tx_30min_window": "int64",
    "customer_id_avg_amount_30min_window": "float64",
    "customer_id_nb_tx_60min_window": "int64",
    "customer_id_avg_amount_60min_window": "float64",
    "terminal_id_nb_tx_1day_window": "int64",
    "terminal_id_nb_tx_7day_window": "int64",
    "terminal_id_nb_tx_14day_window": "int64",
    "terminal_id_risk_1day_window": "float64",
    "terminal_id_risk_7day_window": "float64",
    "terminal_id_risk_14day_window": "float64",
    "terminal_id_nb_tx_15min_window": "int64",
    "terminal_id_avg_amount_15min_window": "float64",
    "terminal_id_nb_tx_30min_window": "int64",
    "terminal_id_avg_amount_30min_window": "float64",
    "terminal_id_nb_tx_60min_window": "int64",
    "terminal_id_avg_amount_60min_window": "float64",
}

### Initialize Vertex AI and BigQuery SDKs for Python

Initialize the Vertex AI SDK for Python for your project and corresponding bucket.

In [None]:
vertex_ai.init(project=PROJECT_ID, location=REGION)

### Helpers

Use a helper function for sending queries to BigQuery.

In [None]:
# Wrapper to use BigQuery client to run query/job, return job ID or result as DF
def run_bq_query(sql: str) -> Union[str, pd.DataFrame]:
    """
    Run a BigQuery query and return the job ID or result as a DataFrame
    Args:
        sql: SQL query, as a string, to execute in BigQuery
    Returns:
        df: DataFrame of results from query,  or error, if any
    """

    bq_client = bigquery.Client()

    # Try dry run before executing query to catch any errors
    job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
    bq_client.query(sql, job_config=job_config)

    # If dry run succeeds without errors, proceed to run query
    job_config = bigquery.QueryJobConfig()
    client_result = bq_client.query(sql, job_config=job_config)

    job_id = client_result.job_id

    # Wait for query/job to finish running. then get & return data frame
    df = client_result.result().to_arrow().to_pandas()
    print(f"Finished job_id: {job_id}")
    return df

## Fetching feature values for model training

In the previous labs `02_feature_engineering_batch.ipynb` and `03_feature_engineering_streaming`, you computed features and ingested them into Vertex AI Feature Store. You can now look back into Feature Store to any point in time and output those values from Feature Store in batch to a BigQuery table in order to train your machine learning model.

Note that in these Fraudfinder labs, the ground-truth fraud labels are already available for all transactions, even as soon as a new transaction occurs. However, in practice, there is usually a delay (in th order of _days_) between when a transaction occurs until it gets officially flagged as a fraudulent transaction by a fraud department. Because of this, if the data you use to train on doesn't yet have the up-to-date information about whether it's truly fraud or not, you may end up training an ML model on incorrect data (i.e. on transactions that are actually fraud, but were labeled as not-fraud at the time of training). If this is the case, then when you choose what data you use for training a model, be sure you use the point-in-time by "time-traveling" an appropriate number of days into the past to a date when you're confident that the labels should be accurate when fetching feature values from Vertex AI Feature Store. For this notebook, you will simply be "time traveling" to yesterday for simplicity.

To fetch data from Vertex AI Feature Store (via batch serving), you will need to provide the following inputs:

- a file containing a "query", with the entities and timestamps for each transaction
- a list of features to fetch values for
- the destination location and format where you want your feature values to be outputted to


### Preparing the data to query Vertex AI Feature Store (the "Read-Instance")

You will first need to prepare data that you want to use to query Vertex AI Feature Store. It will need to be a CSV file formatted like the table below:

|customer                     |terminal|timestamp                                    |
|-----------------------------|--------|---------------------------------------------|
|xxx3859                         |xxx8811    |2021-07-07 00:01:10 UTC                      |
|xxx4165                         |xxx8810    |2021-07-07 00:01:55 UTC                      |
|xxx2289                         |xxx2081    |2021-07-07 00:02:12 UTC                      |
|xxx3227                         |xxx3011    |2021-07-07 00:03:23 UTC                      |
|xxx2819                         |xxx6263    |2021-07-07 00:05:30 UTC                      |

where the column names are the name of entities in Feature Store and the timestamps represents the time an event occurred. Vertex AI Feature Store will then retrieve the last feature value up to (but not after) those timestamps.

In [None]:
read_instances_query = f"""
CREATE OR REPLACE TABLE `{PROJECT_ID}.tx.{READ_INSTANCES_TABLE}` as (
    SELECT
        raw_tx.TX_TS AS timestamp,
        raw_tx.CUSTOMER_ID AS customer,
        raw_tx.TERMINAL_ID AS terminal,
        raw_tx.TX_AMOUNT AS tx_amount,
        raw_lb.TX_FRAUD AS tx_fraud,
    FROM 
        tx.tx as raw_tx
    LEFT JOIN 
        tx.txlabels as raw_lb
    ON raw_tx.TX_ID = raw_lb.TX_ID
    WHERE
        DATE(raw_tx.TX_TS) = "{START_DATE_TRAIN}"
);
"""
print(read_instances_query)

run_bq_query(read_instances_query)
run_bq_query(f"SELECT * FROM `{PROJECT_ID}.tx.{READ_INSTANCES_TABLE}` LIMIT 10")

### Initiate Feature Store

Initiate the feature store you created in the `02_feature_engineering_batch.ipynb` notebook.

In [None]:
try:
    ff_feature_store = vertex_ai.Featurestore(f"{FEATURESTORE_ID}")
    print(f"""The feature store {FEATURESTORE_ID} found!""")
except NameError:
    print(f"""The feature store {FEATURESTORE_ID} does not exist!""")

### Export a sample of data to a BigQuery using Vertex AI Feature Store's point-in-time capability

In this section, we will use batch serving of feature store to prepare a dataset for training by calling the `BatchReadFeatureValues` API. Batch serving is used to fetch a large set of feature values with high throughput, typically for training a model or batch prediction. For outputting the results to BigQuery, you will use the [`batch_serve_to_bq` method](https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.Featurestore#google_cloud_aiplatform_Featurestore_batch_serve_to_bq).

In the follow cell, based on the table you created above, you will query feature store for all of the relevant feature values, and export the feature values into a new table in BigQuery, which can the be used for model training later on.

In [None]:
ff_feature_store.batch_serve_to_bq(
    bq_destination_output_uri=TRAIN_TABLE_URI,
    serving_feature_ids=SERVING_FEATURE_IDS,
    read_instances_uri=READ_INSTANCES_URI,
    pass_through_fields=["tx_amount", "tx_fraud"],
)

print(f"Feature values from feature store outputted to: {TRAIN_TABLE_URI}.")

### Inspect the resulting output table from Vertex AI Feature Store

Run the query below to inspect the training table. 

In [None]:
sql_inspect = f"""
SELECT
    *
FROM
    `tx.{BQ_TABLE_NAME}`
LIMIT
    5
"""
run_bq_query(sql_inspect)

As you can see, each row represents a transaction ID, and the columns represent the attributes of the transaction (i.e `customer_id`, `terminal_id`, `tx_amount`),  aggregated features (for the customer and terminal) and the ground-truth label `tx_fraud` (1 if fraud, else 0).

## Using BigQuery ML to train a machine learning model directly in BigQuery

BigQuery ML (BQML) provides the capability to train machine learning models, such as classification, regression, forecasting, and matrix factorization, in BigQuery using SQL syntax directly. BigQuery ML uses the scalable infrastructure of BigQuery so you don't need to set up additional infrastructure for training or batch serving.

Because the data from Vertex AI Feature Store was outputted to a BigQuery table, this makes the data conveniently available for training directly using BigQuery ML.

Learn more about [BigQuery ML documentation](https://cloud.google.com/bigquery-ml/docs).

### Train a logistic regression model using BigQuery ML

The query below trains a logistic regression model using BigQuery ML. BigQuery resources are used to train the model.

In the `OPTIONS` parameter:
* with `model_registry="vertex_ai"`, the BigQuery ML model will automatically be [registered to Vertex AI Model Registry](https://cloud.google.com/vertex-ai/docs/model-registry/model-registry-bqml), which enables you to view all of your registered models and its versions on Google Cloud in one place.

* `vertex_ai_model_version_aliases` allows you to set aliases to help you keep track of your model version ([documentation](https://cloud.google.com/vertex-ai/docs/model-registry/model-alias)).

In [None]:
# this cell may take ~4 min to run
sql_train_model_bqml = f"""
CREATE OR REPLACE MODEL `tx.{MODEL_NAME}` 
OPTIONS(
  MODEL_TYPE="LOGISTIC_REG",
  INPUT_LABEL_COLS=["tx_fraud"],
  EARLY_STOP=TRUE,
  MIN_REL_PROGRESS=0.01,
  model_registry="vertex_ai", 
  vertex_ai_model_id='{VERTEX_AI_MODEL_ID}',
  vertex_ai_model_version_aliases=['logit', 'experimental']
) AS

SELECT
  * EXCEPT(timestamp, entity_type_customer, entity_type_terminal)
FROM
   `tx.{BQ_TABLE_NAME}`
"""

print(sql_train_model_bqml)

run_bq_query(sql_train_model_bqml)

print(f"Training job finished for: `tx.{MODEL_NAME}`")

#### Inspect the model on Vertex AI Model Registry
The model should now be automatically registered to Vertex AI Model Registry upon completion.

You can view the model on the <a href="https://console.cloud.google.com/vertex-ai/models" target="_blank">Vertex AI Model Registry page</a>, or use the code below to check that it was successfully registered:

In [None]:
# Initiate Vertex AI Model Registry for `VERTEX_AI_MODEL_ID` model entry
registry = vertex_ai.models.ModelRegistry(VERTEX_AI_MODEL_ID)

In [None]:
# Get model versions
versions = registry.list_versions()

for version in versions:
    version_id = version.version_id
    version_created_time = datetime.fromtimestamp(
        version.version_create_time.timestamp()
    ).strftime("%m/%d/%Y %H:%M:%S")
    version_aliases = version.version_aliases
    print(
        f"Model version {version_id} was created at {version_created_time} with aliases {version_aliases}",
    )

In [None]:
# Get the model
model = registry.get_model(version="logit")
print(model)

### Model evaluation

With the model created, you can now evaluate the logistic regression model. Behind the scenes, BigQuery ML automatically [split the data](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-create#data_split_method), which makes it easier to quickly train and evaluate models.

In [None]:
sql_evaluate_model = f"""
SELECT
  *
FROM
  ML.EVALUATE(MODEL tx.{MODEL_NAME})
"""

print(sql_evaluate_model)

run_bq_query(sql_evaluate_model)

These metrics help you understand the performance of the model. 

There are various metrics for logistic regression and other model types (full list of metrics can be found in the [documentation](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-evaluate#mlevaluate_output)).

### Batch prediction (with Explainable AI)

Make a batch prediction in BigQuery ML on the original training data to check the probability of a transaction to be fraudulent for transaction, as seen in the `probability` column, with the predicted label under the `predicted_tx_fraud` column.

[ML.EXPLAIN_PREDICT](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-predict) has built-in [Explainable AI](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-xai-overview). This allows you to see the top contributing features to each prediction and interpret how it was computed.

In [None]:
sql_explain_predict = f"""
SELECT
  *
FROM
  ML.EXPLAIN_PREDICT(MODEL tx.{MODEL_NAME},
    (SELECT * FROM  `tx.{BQ_TABLE_NAME}` LIMIT 10)
    )
"""

print(sql_explain_predict)

run_bq_query(sql_explain_predict)

Since the `top_feature_attributions` is a nested column, you can unnest the array ([documentation](https://cloud.google.com/bigquery/docs/reference/standard-sql/arrays)) into separate rows for each of the features. In other words, since ML.EXPLAIN_PREDICT provides the top 5 most important features, using `UNNEST` results in 5 rows per prediction:

In [None]:
sql_explain_predict = f"""
SELECT
  tfa.*,
  predicted_tx_fraud,
  probability,
  baseline_prediction_value,
  prediction_value,
  approximation_error,
FROM
  ML.EXPLAIN_PREDICT(MODEL tx.{MODEL_NAME},
    (SELECT * FROM `tx.{BQ_TABLE_NAME}` )
    ),
  UNNEST(top_feature_attributions) as tfa
LIMIT 100
"""

print(sql_explain_predict)

run_bq_query(sql_explain_predict)

### Deploy the model to an endpoint

While BigQuery ML supports batch prediction with [ML.PREDICT](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-predict) and [ML.EXPLAIN_PREDICT](https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-explain-predict), BigQuery ML is not suitable for real-time predictions where you need low latency predictions with potentially high frequency of requests.

In other words, deploying the BigQuery ML model to an endpoint enables you to do online predictions.

#### Create a Vertex AI endpoint

To deploy your model to an endpoint, you will first need to create an endpoint before you deploy the model to it.

In [None]:
endpoint = vertex_ai.Endpoint.create(
    display_name=ENDPOINT_NAME,
    project=PROJECT_ID,
    location=REGION,
)

print(endpoint.display_name)
print(endpoint.resource_name)

#### List endpoints

List the endpoints to make sure it has successfully been created. You can also view your endpoints on the [Vertex AI Endpoints page](https://console.cloud.google.com/vertex-ai/endpoints).

In [None]:
endpoint.list(order_by="update_time")

#### Deploy model to Vertex endpoint

With the model, you can now deploy it to an endpoint. 

In [None]:
# deploying the model to the endpoint may take 10-15 minutes
model.deploy(endpoint=endpoint)

You can also check on the status of your model by visiting the [Vertex AI Endpoints page](https://console.cloud.google.com/vertex-ai/endpoints).

### Make online predictions to the endpoint with pub/sub -> pull subscription -> notebook -> Vertex AI endpoint
Using a sample of the training data, you can test the endpoint to make online predictions.

Below are some helper functions to help make it easier to read streaming data and do online predictions:

In [None]:
# A function to read some sample messages (transaction) from the public Pub/Sub
def read_from_sub(project_id, subscription_path, messages=10):
    """
    Read messages from a Pub/Sub subscription
    Args:
        project_id: project ID
        subscription_name: the name of a Pub/Sub subscription in your project
        messages: number of messages to read
    Returns:
        msg_data: list of messages in your Pub/Sub subscription as a Python dictionary
    """
    import ast

    from google.api_core import retry
    from google.cloud import pubsub_v1

    subscriber = pubsub_v1.SubscriberClient()
    subscription_path = subscriber.subscription_path(project_id, subscription_path)

    # Wrap the subscriber in a 'with' block to automatically call close() to
    # close the underlying gRPC channel when done.
    with subscriber:
        # The subscriber pulls a specific number of messages. The actual
        # number of messages pulled may be smaller than max_messages.
        response = subscriber.pull(
            subscription=subscription_path,
            max_messages=messages,
            retry=retry.Retry(deadline=300),
        )

        if len(response.received_messages) == 0:
            print("no messages")
            return

        ack_ids = []
        msg_data = []
        for received_message in response.received_messages:
            msg = ast.literal_eval(received_message.message.data.decode("utf-8"))
            print(f"Received: {msg}.")
            msg_data.append(msg)
            ack_ids.append(received_message.ack_id)

        # Acknowledges the received messages so they will not be sent again.
        subscriber.acknowledge(subscription=subscription_path, ack_ids=ack_ids)

        print(
            f"Received and acknowledged {len(response.received_messages)} messages from {subscription_path}."
        )

        return msg_data


# A function for pre-processing of payload before sending it to a Vertex AI endpoint
def preprocess(payload):
    """
    Preprocesses the payload before sending it to a Vertex AI endpoint
    Args:
        payload: payload to be preprocessed
    Returns:
        payload: preprocessed payload
    """
    # replace NaN's
    for key, value in payload.items():
        if value is None:
            payload[key] = 0.0
    return payload


# A function to lookup features in Vertex AI Feature Store
def features_lookup(ff_feature_store, entity, entity_ids):
    """
    Looks up features in Vertex AI Feature Store
    Args:
        ff_feature_store: Feature Store object
        entity: entity type
        entity_ids: list of entity IDs
    Returns:
        features: list of features
    """
    entity_type = ff_feature_store.get_entity_type(entity)
    aggregated_features = entity_type.read(entity_ids=entity_ids, feature_ids="*")
    aggregated_features_preprocessed = preprocess(aggregated_features)
    features = aggregated_features_preprocessed.iloc[0].to_dict()
    return features

You can now read some messages from Pub/Sub, preprocess it, augment it with some features from Vertex AI Feature Store, and submit to Vertex AI Endpoint for online predictions.

In [None]:
messages = read_from_sub(
    project_id=PROJECT_ID, subscription_path="ff-tx-sub", messages=10
)

for payload_input in messages:
    print(f"----The raw transaction from Pub/Sub----")
    print(json.dumps(payload_input, indent=4))
    print(f"----The input payload to the Vertex AI endpoint:----")
    
    payload = {}
    payload["tx_amount"] = payload_input["TX_AMOUNT"]
    
    # look up the customer features from feature store (written by batch ingestion daily and by Dataflow in real-time)
    customer_features = features_lookup(
        ff_feature_store, "customer", [payload_input["CUSTOMER_ID"]]
    )
    payload["customer_id_nb_tx_1day_window"] = customer_features["customer_id_nb_tx_1day_window"]
    payload["customer_id_nb_tx_7day_window"] = customer_features["customer_id_nb_tx_7day_window"]
    payload["customer_id_nb_tx_14day_window"] = customer_features["customer_id_nb_tx_14day_window"]
    payload["customer_id_avg_amount_1day_window"] = customer_features["customer_id_avg_amount_1day_window"]
    payload["customer_id_avg_amount_7day_window"] = customer_features["customer_id_avg_amount_7day_window"]
    payload["customer_id_avg_amount_14day_window"] = customer_features["customer_id_avg_amount_14day_window"]
    payload["customer_id_nb_tx_15min_window"] = customer_features["customer_id_nb_tx_15min_window"]
    payload["customer_id_avg_amount_15min_window"] = customer_features["customer_id_avg_amount_15min_window"]
    payload["customer_id_nb_tx_30min_window"] = customer_features["customer_id_nb_tx_30min_window"]
    payload["customer_id_avg_amount_30min_window"] = customer_features["customer_id_avg_amount_30min_window"]
    payload["customer_id_nb_tx_60min_window"] = customer_features["customer_id_nb_tx_60min_window"]
    payload["customer_id_avg_amount_60min_window"] = customer_features["customer_id_avg_amount_60min_window"]

    # look up the terminal features from feature store (written by batch ingestion daily and by Dataflow in real-time)
    terminal_features = features_lookup(
        ff_feature_store, "terminal", [payload_input["TERMINAL_ID"]]
    )
    
    payload["terminal_id_nb_tx_1day_window"] = terminal_features["terminal_id_nb_tx_1day_window"]
    payload["terminal_id_nb_tx_7day_window"] = terminal_features["terminal_id_nb_tx_7day_window"]
    payload["terminal_id_nb_tx_14day_window"] = terminal_features["terminal_id_nb_tx_14day_window"]
    payload["terminal_id_risk_1day_window"] = terminal_features["terminal_id_risk_1day_window"]
    payload["terminal_id_risk_7day_window"] = terminal_features["terminal_id_risk_7day_window"]
    payload["terminal_id_risk_14day_window"] = terminal_features["terminal_id_risk_14day_window"]
    payload["terminal_id_nb_tx_15min_window"] = terminal_features["terminal_id_nb_tx_15min_window"]
    payload["terminal_id_avg_amount_15min_window"] = terminal_features["terminal_id_avg_amount_15min_window"]
    payload["terminal_id_nb_tx_30min_window"] = terminal_features["terminal_id_nb_tx_30min_window"]
    payload["terminal_id_avg_amount_30min_window"] = terminal_features["terminal_id_avg_amount_30min_window"]
    payload["terminal_id_nb_tx_60min_window"] = terminal_features["terminal_id_nb_tx_60min_window"]
    payload["terminal_id_avg_amount_60min_window"] = terminal_features["terminal_id_avg_amount_60min_window"]
    payload = preprocess(payload)
    
    print(json.dumps(payload, indent=4))
    print(f"----The prediction result:----")

    result = endpoint.predict(instances=[payload])
    print(json.dumps(result, indent=4))
    print(f"===============================================================")

### END

Now you can go to the next notebook `05_model_training_pipeline_formalization.ipynb`

### Clean up

In [None]:
# endpoint[-1].undeploy_all()

In [None]:
# delete_model_sql = f"""
# DROP MODEL `{PROJECT_ID}.{BQ_DATASET}.{BQML_MODEL_NAME}`
# """

# bq_query(delete_model_sql)