In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Fetch historical feature values

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/feature_store/offline_feature_serving_from_bigquery_with_feature_registry.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fofficial%2Ffeature_store%2Foffline_feature_serving_from_bigquery_with_feature_registry.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/feature_store/offline_feature_serving_from_bigquery_with_feature_registry.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/feature_store/offline_feature_serving_from_bigquery_with_feature_registry.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

## Overview

In this tutorial, you will learn how to use the Vertex AI SDK for Python to retrieve historical values from the feature data source in BigQuery.

This tutorial uses the following Google Cloud ML services and resources:

* Vertex AI Feature Store
* BigQuery

The steps performed include the following:

* Setup BigQuery data
* Setup Feature Registry
* Fetch historical feature values from feature data source in BigQuery
* Clean up

### Objective

## Get started

### Install Vertex AI SDK for Python and other required packages


In [None]:
! pip3 install --upgrade --quiet google-cloud-aiplatform bigframes

### Restart runtime (Colab only)

To use the newly installed packages, you must restart the runtime on Google Colab.

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

Authenticate your environment on Google Colab.


In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK for Python

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}


import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION)

### Imports and IDs

Import the packages required to use the`fetch_historical_feature_values()`
function in the Vertex AI SDK for Python.

In [None]:
import bigframes
import bigframes.pandas
import pandas as pd
from google.cloud import bigquery
from vertexai.resources.preview.feature_store import (Feature, FeatureGroup,
                                                      offline_store)
from vertexai.resources.preview.feature_store import utils as fs_utils

The following variables set BigQuery and Feature Group resources that will be
used or created. If you'd like to use your own data source (CSV), please adjust
`DATA_SOURCE`.

In [None]:
BQ_DATASET_ID = "fhfv_dataset_unique"  # @param {type:"string"}
BQ_TABLE_ID = "fhfv_table_unique"  # @param {type:"string"}
BQ_TABLE_URI = f"{PROJECT_ID}.{BQ_DATASET_ID}.{BQ_TABLE_ID}"

FEATURE_GROUP_ID = "fhfv_fg_unique"  # @param {type:"string"}

DATA_SOURCE = "gs://cloud-samples-data-us-central1/vertex-ai/feature-store/datasets/movie_prediction.csv"

## Create BigQuery table containing feature data

First we'll use BigQuery DataFrames to load in our CSV data source. Then we'll
rename the `timestamp` column to `feature_timestamp` to support usage as a
BigQuery source in Feature Registry.

In [None]:
session = bigframes.connect(
    bigframes.BigQueryOptions(
        project=PROJECT_ID,
        location=LOCATION,
    )
)
df = session.read_csv(DATA_SOURCE)
df["timestamp"] = pd.to_datetime(df["timestamp"], utc=True)
df = df.rename(columns={"timestamp": "feature_timestamp"})

Let's preview the data we'll write to the table.

In [None]:
df.head()

And finally we'll write the DataFrame to the target BigQuery table.

In [None]:
df.to_gbq(BQ_TABLE_URI, if_exists="replace")

## Create feature registry resources

Create a feature group backed by the BigQuery table created above.

In [None]:
fg: FeatureGroup = FeatureGroup.create(
    f"{FEATURE_GROUP_ID}",
    fs_utils.FeatureGroupBigQuerySource(
        uri=f"bq://{BQ_TABLE_URI}", entity_id_columns=["users"]
    ),
)

Create the `movies` feature which corresponds to the `movies` column in the
recently created BigQuery table.

In [None]:
movies_feature: Feature = fg.create_feature("movies")

## Fetch historical feature values

### Fetch historical feature values for an entity

The following will fetch historical feature values for the same entity (`alice`)
at two different timestamps. We expect the values of the `movies` feature at
each of those timestamps.

In [None]:
entity_df = pd.DataFrame(
    data={
        "users": ["alice", "alice"],
        "timestamp": [
            pd.Timestamp("2021-09-14T09:36"),
            pd.Timestamp("2023-12-12T13:13"),
        ],
    },
)

offline_store.fetch_historical_feature_values(
    entity_df=entity_df,
    features=[movies_feature],
)

### Fetch with multiple entities

The following will fetch historical feature values for two different entities
at different timestamps. We expect the values of the `movies` feature for each
entity at it's corresponding timestamp.

In [None]:
entity_df = pd.DataFrame(
    data={
        "users": ["alice", "bob"],
        "timestamp": [
            pd.Timestamp("2021-09-14T09:36"),
            pd.Timestamp("2023-12-12T13:13"),
        ],
    },
)

offline_store.fetch_historical_feature_values(
    entity_df=entity_df,
    features=[movies_feature],
)

## Cleaning up

### Delete feature and feature group

In [None]:
movies_feature.delete()

In [None]:
fg.delete()

### Delete BigQuery dataset and table

In [None]:
client = bigquery.Client()

In [None]:
client.delete_table(f"{BQ_TABLE_URI}")

In [None]:
client.delete_dataset(f"{PROJECT_ID}.{BQ_DATASET_ID}")