In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

<table align="left">

  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/master/notebooks/community/bigquery_ml/bqml-online-prediction.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github/GoogleCloudPlatform/vertex-ai-samples/blob/master/notebooks/community/bigquery_ml/bqml-online-prediction.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/blob/master/notebooks/community/bigquery_ml/bqml-online-prediction.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
      Open in Vertex AI Workbench
    </a>
  </td>                                                                                               
</table>

## Overview

[Fraudfinder](https://github.com/googlecloudplatform/fraudfinder) is a series of labs on how to build a real-time fraud detection system on Google Cloud. Throughout the Fraudfinder labs, you will learn how to read historical bank transaction data stored in data warehouse, read from a live stream of new transactions, perform exploratory data analysis (EDA), do feature engineering, ingest features into a feature store, train a model using feature store, register your model in a model registry, evaluate your model, deploy your model to an endpoint, do real-time inference on your model with feature store, and monitor your model.

### Objective

In this notebook, you learn to setup the Vertex AI Model Monitoring service to detect feature skew and drift in the input predict requests.

This tutorial uses the following Google Cloud ML services:

- Vertex AI Model Monitoring
- Vertex AI Prediction
- Vertex AI Model resource
- Vertex AI Endpoint resource

The steps performed include:

- Enable model monitoring for Endpoint resource.
- Detect skew and drift for feature inputs.
- Detect skew and drift for feature attributions.
- List, pause, resume and delete monitoring jobs.
- Restart monitoring job with predefined input schema.
- View logged monitored data

### Costs 

This tutorial uses billable components of Google Cloud:

* BigQuery
* BigQuery ML
* Vertex AI

Learn about [BigQuery Pricing](https://cloud.google.com/bigquery/pricing), [BigQuery ML pricing](https://cloud.google.com/bigquery-ml/pricing), [Vertex AI
pricing](https://cloud.google.com/vertex-ai/pricing), and use the [Pricing
Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

### Load config settings

In [None]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
BUCKET_NAME = f"{PROJECT_ID}-fraudfinder"
config = !gsutil cat gs://{BUCKET_NAME}/config/notebook_env.py
print(config.n)
exec(config.n)

### Define constants

In [None]:
BQ_DATASET = "tx"
END_DATE_TRAIN = "2022-01-31"
TRAIN_TABLE_NAME = f"train_table_{END_DATE_TRAIN.replace('-', '')}"
MODEL_ARTIFACT_URI = f"gs://{BUCKET_NAME}/deliverables/{MODEL_NAME}"
DEPLOY_VERSION = "tf2-cpu.2-5"
DEPLOY_IMAGE = "{}-docker.pkg.dev/vertex-ai/prediction/{}:latest".format(
    REGION.split("-")[0], DEPLOY_VERSION
)
DEPLOY_MACHINE_TYPE = "n1-standard-4"
MIN_REPLICA_COUNT = 1
MAX_REPLICA_COUNT = 1

CUSTOMER_FEATURES = ['customer_id_nb_tx_1day_window',
                     'customer_id_nb_tx_60min_window',
                     'customer_id_avg_amount_7day_window',
                     'customer_id_nb_tx_14day_window',
                     'customer_id_avg_amount_30min_window',
                     'customer_id_nb_tx_15min_window',
                     'customer_id_nb_tx_7day_window',
                     'customer_id_avg_amount_15min_window',
                     'customer_id_avg_amount_14day_window',
                     'customer_id_avg_amount_1day_window',
                     'customer_id_avg_amount_60min_window',
                     'customer_id_nb_tx_30min_window']

TERMINAL_FEATURES = ['terminal_id_risk_7day_window',
                     'terminal_id_nb_tx_60min_window',
                     'terminal_id_nb_tx_1day_window',
                     'terminal_id_nb_tx_15min_window',
                     'terminal_id_avg_amount_30min_window',
                     'terminal_id_nb_tx_14day_window',
                     'terminal_id_risk_14day_window',
                     'terminal_id_avg_amount_15min_window',
                     'terminal_id_nb_tx_7day_window',
                     'terminal_id_nb_tx_30min_window',
                     'terminal_id_avg_amount_60min_window',
                     'terminal_id_risk_1day_window']

### Import libraries

In [None]:
# General
from typing import Union, List, Dict
from datetime import datetime
import time
import random
import pandas as pd

# BigQuery
from google.cloud import bigquery

# Vertex AI 
from google.cloud import aiplatform as vertex_ai
from google.cloud.aiplatform import model_monitoring

### Initialize Vertex AI and BigQuery SDKs for Python

Initialize the Vertex AI SDK for Python for your project and corresponding bucket.

In [None]:
vertex_ai.init(project=PROJECT_ID, location=REGION)

Create the BigQuery client.

In [None]:
bq_client = bigquery.Client(project=PROJECT_ID)

### Helpers

Use a helper function for sending queries to BigQuery.

In [None]:
# Wrapper to use BigQuery client to run query/job, return job ID or result as DF
def run_bq_query(
    sql: str, project: str, region: str, return_df=False
) -> Union[str, pd.DataFrame]:
    """
    An helper function to run a BigQuery query
    Args:
        sql: BigQuery query
        project: project id
        region: region
        debug: dry run the query
        return_df: return a dataframe or not
    Returns:
        df: BigQuery query result
    """

    # Create a BigQuery client.
    bq_client = bigquery.Client()

    # Try dry run before executing query to catch any errors
    job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
    bq_client.query(sql, job_config=job_config)

    # Proceed to run query
    job_config = bigquery.QueryJobConfig()
    client_result = bq_client.query(sql, job_config=job_config)
    result = client_result.result()
    job_id = client_result.job_id
    print(f"Finished job_id: {job_id}")

    if return_df:
        # Get & return data frame
        df = result.to_arrow().to_pandas()
        return df

def get_entity_ids(dataset_uri: str, n_entities:int, entity_name_flag: str) -> List[str]:
    """
    An helper function to get entity ids from the dataset uri
    Args:
        dataset_uri: The BQ dataset uri
        n_entities: number of entities to get
        entity_name_flag: The entity name flag
    Returns:
        entity_ids: list of entity ids
    """

    # Download the table.
    table = bigquery.TableReference.from_string(dataset_uri)
    rows = bq_client.list_rows(table, max_results=n_entities)

    entity_ids = []
    for row in rows:
        for key, value in row.items():
            if key == entity_name_flag:
                entity_ids.append(value)
            else:
                continue
    return entity_ids

def read_entity_features(entity_name: str, entity_ids: List[str],
                         feature_ids: List[str]) -> pd.DataFrame:
    """
    An helper function to read entity features from Vertex AI Feature store
    Args:
        entity_type: Vertex AI Entity type object
        entity_ids: list of entity ids
        feature_ids: list of feature ids
    Returns:
        entity_features: dict of entity features
    """
    entity = vertex_ai.EntityType(entity_name)
    entity_features = None
    try:
        entity_features = entity.read(
            entity_ids=entity_ids,
            feature_ids=feature_ids,
        )
    except NameError:
        print(f"The entity_ids {', '.join(entity_ids)} does not exist in the feature store")
    else:
        entity_features = pd.DataFrame(data=[entity_ids + [0]*len(feature_ids)],
                                       columns=[entity.name] + feature_ids)
    return entity_features


def generate_online_transaction(customer_id: str, terminal_id: str) -> pd.DataFrame:
    """
    An helper function to generate a random sample for online prediction
    Args:
        customer_id: customer id
        terminal_id: terminal id
    Returns:
        online_sample: Dataframe of online sample
    """
    # TODO - generate a random tx_id
    online_tx = {"tx_id": "",
                 "tx_ts": datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC'),
                 "customer_id": customer_id,
                 "terminal_id": terminal_id,
                 "tx_amount": round(random.uniform(0, 956.13), 3)
                 }
    return pd.DataFrame.from_dict([online_tx])

def generate_online_sample(customer_features: pd.DataFrame, terminal_features: pd.DataFrame,
                           online_transaction: pd.DataFrame) -> pd.DataFrame:
    """
    An helper function to merge the customer and terminal features with the online transactions
    Args:
        customer_features: customer features
        terminal_features: terminal features
        online_transaction: online transactions
    Returns:
        online_sample: Dataframe of online sample
    """
    online_sample_df = pd.merge(
        online_transaction,
        customer_features,
        left_on="customer_id",
        right_on="customer"
    )

    online_sample_df = pd.merge(
        online_sample_df,
        terminal_features,
        left_on="terminal_id",
        right_on="terminal"
    )
    
    online_sample_df = online_sample_df.drop(['tx_id', 'tx_ts', 'customer', 'terminal'], axis=1)

    return online_sample_df

### Monitor your model with Vertex AI Model Monitoring

With Vertex AI Model Monitoring, you can monitor for skew and drift detection of the predictions, features and its attributions (Explainable AI) in the incoming prediction requests.

With custom models, the model monitoring service requires:

- for drift detection, the schema of the features to derive the feature values

- for skew detection, a training data sample as baseline to calculate the distribution

- for feature attribution skew and drift detection, Vertex Explainable AI to be configured. 

In the following sections, we are going to cover all those requirements settings. 

#### Configure Vertex Explainable AI for feature attribution skew and drift detection

To configure Vertex Explainable AI for feature attribution skew and drift detection in our case, you need to

- Define you explainability specification
- Upload the model including the explainability specification

Then, you need to pass the explainability specification to the model monitoring job. 

##### Define you explainability specification 

In [None]:
model_id = f"{PROJECT_ID}.{BQ_DATASET}.{MODEL_NAME}"
bqml_model = bq_client.get_model(model_id)

In [None]:
index_feature_mapping = []
for feature in bqml_model.feature_columns:
    index_feature_mapping.append(feature.name)
label_name = bqml_model.label_columns[0].name

In [None]:
explanation_params =  vertex_ai.explain.ExplanationParameters({"sampled_shapley_attribution": {"path_count": 10}})
explanation_inputs = {feature_name:{'input_tensor_name':feature_name} for feature_name in index_feature_mapping}
explanation_outputs = {label_name: {'output_tensor_name': label_name}}
explanation_metadata = vertex_ai.explain.ExplanationMetadata(inputs=explanation_inputs, outputs=explanation_outputs)

##### Upload the model with explainability specification as new default version

In [None]:
model = vertex_ai.Model.list(filter=f"display_name={MODEL_NAME}")[-1]

#### Deploy the Vertex AI Model to a Vertex AI Endpoint

In [None]:
endpoint = vertex_ai.Endpoint.list(filter=f"display_name={ENDPOINT_NAME}")[-1]

In [None]:
model.deploy(
        endpoint=endpoint,
        deployed_model_display_name="fraud_detector_" + ID,
        machine_type=DEPLOY_MACHINE_TYPE,
        min_replica_count=MIN_REPLICA_COUNT,
        max_replica_count=MAX_REPLICA_COUNT,
        # explanation_parameters=explanation_params,
        # explanation_metadata=explanation_metadata,
        traffic_percentage = 100,
        sync=True
    )

#### Generate Predictions

In [None]:
# Initiate the feature store
ff_feature_store = vertex_ai.Featurestore(featurestore_name=FEATURESTORE_ID)

# Get the entity type ids
customer_entity_type = [entity.resource_name for entity in ff_feature_store.list_entity_types() if 'customer' in entity.resource_name][0]
terminal_entity_type = [entity.resource_name for entity in ff_feature_store.list_entity_types() if 'terminal' in entity.resource_name][0]

# Get the entity ids
dataset_id = f"{PROJECT_ID}.{BQ_DATASET}.{TRAIN_TABLE_NAME}"
customer_ids = get_entity_ids(dataset_id, 10, "entity_type_customer")
terminal_ids = get_entity_ids(dataset_id, 10, "entity_type_terminal")

# Collect the online transactions
online_samples = []
for c_id, t_id in zip(customer_ids, terminal_ids):
    online_transaction = generate_online_transaction(c_id, t_id)
    # Read the customer and terminal entity features
    c_features = read_entity_features(customer_entity_type, [c_id], CUSTOMER_FEATURES)
    t_features = read_entity_features(terminal_entity_type, [t_id], TERMINAL_FEATURES)
    # Merge the customer and terminal features with the online transaction
    online_sample = generate_online_sample(
        c_features,
        t_features,
        online_transaction,
    )
    online_samples.append(online_sample)

In [None]:
# Run the online prediction request
try:
    for online_sample in online_samples[:5]:
        prediction_instance = online_sample.drop(["customer_id", "terminal_id"], axis=1).to_dict('records')
        prediction = endpoint.predict(prediction_instance)
        print(
            f"Prediction request: customer_id - {online_sample.customer_id.values} - terminal_id - {online_sample.terminal_id.values} - prediction - {prediction[0][0]['predicted_tx_fraud']} \n"
        )
        time.sleep(1)
except Exception as exception:
    print(f"Prediction request: customer_id - {online_sample.customer_id.values} - terminal_id - {online_sample.terminal_id.values} failed.", "Exception:", exception)

In [None]:
# Run the online prediction request with explaination
# for online_sample in online_samples:
#     prediction_instance = online_sample.to_dict('records')
#     prediction = endpoint.explain(prediction_instance)
#     # time.sleep(1)

#### Define and create a Model Monitoring job

To set up either skew detection or drift detection, create a model deployment monitoring job. 

The job requires the following specifications:

- `alert_config`: Configures how alerts are sent to the user. Right now only email alert is supported.
- `schedule_config`: Configures model monitoring job scheduling interval in hours. This defines how often the monitoring jobs are triggered.
- `logging_sampling_strategy`: Sample Strategy for logging.
- `drift_config`
- `skew_config`

##### Define the alerting configuration

In [None]:
user_emails = ['recipient1@domain.com', 'recipient2@domain.com']
alert_config = model_monitoring.EmailAlertConfig(user_emails, enable_logging=True)

##### Define the schedule configuration

In [None]:
monitor_interval = 1
schedule_config = model_monitoring.ScheduleConfig(monitor_interval=monitor_interval)

##### Define the logging sample strategy

In [None]:
sample_rate = 0.5 
logging_sampling_strategy = model_monitoring.RandomSampleConfig(sample_rate=sample_rate)

##### Define the drift detection configuration

In [None]:
drift_threshold_value = 0.05
attribution_drift_threshold_value = 0.05

drift_thresholds = {
    "tx_amount": drift_threshold_value,
    "customer_id_nb_tx_1day_window": drift_threshold_value,
    "customer_id_avg_amount_1day_window": drift_threshold_value,
    "customer_id_nb_tx_15min_window": drift_threshold_value,
    "customer_id_avg_amount_15min_window": drift_threshold_value,
    "terminal_id_nb_tx_1day_window": drift_threshold_value,
    "terminal_id_risk_1day_window": drift_threshold_value,
    "terminal_id_nb_tx_15min_window": drift_threshold_value,
    "terminal_id_avg_amount_15min_window": drift_threshold_value
}

attribution_drift_thresholds = {
    "tx_amount": attribution_drift_threshold_value,
    "customer_id_nb_tx_1day_window": attribution_drift_threshold_value,
    "customer_id_avg_amount_1day_window": attribution_drift_threshold_value,
    "customer_id_nb_tx_15min_window": attribution_drift_threshold_value,
    "customer_id_avg_amount_15min_window": attribution_drift_threshold_value,
    "terminal_id_nb_tx_1day_window": attribution_drift_threshold_value,
    "terminal_id_risk_1day_window": attribution_drift_threshold_value,
    "terminal_id_nb_tx_15min_window": attribution_drift_threshold_value,
    "terminal_id_avg_amount_15min_window": attribution_drift_threshold_value
}

drift_config = model_monitoring.DriftDetectionConfig(
    drift_thresholds=drift_thresholds,
    attribute_drift_thresholds=attribution_drift_thresholds,
)

##### Define the skew detection configuration

In [None]:
train_data_source_uri = f"bq://{PROJECT_ID}.{BQ_DATASET}.{TRAIN_TABLE_NAME}"
target = "tx_fraud"
skew_threshold_value = 0.5
attribute_skew_threshold_value = 0.5

skew_thresholds = {
    "tx_amount": skew_threshold_value,
    "customer_id_nb_tx_1day_window": skew_threshold_value,
    "customer_id_avg_amount_1day_window": skew_threshold_value,
    "customer_id_nb_tx_15min_window": skew_threshold_value,
    "customer_id_avg_amount_15min_window": skew_threshold_value,
    "terminal_id_nb_tx_1day_window": skew_threshold_value,
    "terminal_id_risk_1day_window": skew_threshold_value,
    "terminal_id_nb_tx_15min_window": skew_threshold_value,
    "terminal_id_avg_amount_15min_window": skew_threshold_value
}

attribute_skew_thresholds = {
    "tx_amount": attribute_skew_threshold_value,
    "customer_id_nb_tx_1day_window": attribute_skew_threshold_value,
    "customer_id_avg_amount_1day_window": attribute_skew_threshold_value,
    "customer_id_nb_tx_15min_window": attribute_skew_threshold_value,
    "customer_id_avg_amount_15min_window": attribute_skew_threshold_value,
    "terminal_id_nb_tx_1day_window": attribute_skew_threshold_value,
    "terminal_id_risk_1day_window": attribute_skew_threshold_value,
    "terminal_id_nb_tx_15min_window": attribute_skew_threshold_value,
    "terminal_id_avg_amount_15min_window": attribute_skew_threshold_value
}

skew_config = model_monitoring.SkewDetectionConfig(
    data_source=train_data_source_uri,
    skew_thresholds=skew_thresholds,
    attribute_skew_thresholds=attribute_skew_thresholds,
    target_field=target,
)

##### Create the job configuration

In [None]:
# explanation_config = model_monitoring.ExplanationConfig()

objective_config = model_monitoring.ObjectiveConfig(
    skew_detection_config=skew_config,
    drift_detection_config=drift_config,
    # explanation_config=explanation_config,
)

##### Create the model monitoring job

In [None]:
monitoring_job = vertex_ai.ModelDeploymentMonitoringJob.create(
    display_name="fraud_detection_" + ID,
    project=PROJECT_ID,
    location=REGION,
    endpoint=endpoint,
    logging_sampling_strategy=logging_sampling_strategy,
    schedule_config=schedule_config,
    alert_config=alert_config,
    objective_configs=objective_config,
)

##### Check the monitoring job state

In [None]:
jobs = monitoring_job.list(filter=f"display_name=fraud_detection{ID}")
job = jobs[0]
print(job.state)

#### Make the prediction requests

In [None]:
try:
    for online_sample in online_samples:
        prediction_instance = online_sample.drop(["customer_id", "terminal_id"], axis=1).to_dict('records')
        prediction = endpoint.predict(prediction_instance)
        print(
            f"Prediction request: customer_id - {online_sample.customer_id.values} - terminal_id - {online_sample.terminal_id.values} - prediction - {prediction[0][0]['predicted_tx_fraud']} \n"
        )
        time.sleep(1)
except Exception as exception:
    print(f"Prediction request: customer_id - {online_sample.customer_id.values} - terminal_id - {online_sample.terminal_id.values} failed.", "Exception:", exception)

## (DO NOT RUN) Cleaning up

#### Delete the monitoring job

In [None]:
# monitoring_job.pause()
# monitoring_job.delete()

#### Undeploy the model and delete the endpoint

In [None]:
# endpoint.undeploy_all()
# endpoint.delete()