In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Search and Conversation - Creating and Querying Datastores
In this notebook, we will show you how to:
1. Build a new Vertex Search DataStore
2. Add Site URIs to your DataStore
3. Create a new Engine for Vertex Search
4. Query your DataStore via the Search method.


## Prerequisites
- NOTE - For creating a new Engine, there is a hard requirement to use a Service Account Key.
- For all other methods, local ADC auth creds will work just fine.

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/dfcx-scrapi/blob/main/examples/vertex_ai_conversation/datastores_and_search.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Run in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/dfcx-scrapi/blob/main/examples/vertex_ai_conversation/datastores_and_search.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/dfcx-scrapi/blob/main/examples/vertex_ai_conversation/datastores_and_search.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
</table>

In [None]:
!pip install dfcx-scrapi>=1.10.6

from google.colab import auth as google_auth
google_auth.authenticate_user()

# Imports

In [1]:
from dfcx_scrapi.core.data_stores import DataStores
from dfcx_scrapi.core.engines import Engines
from dfcx_scrapi.core.sites import Sites
from dfcx_scrapi.core.search import Search

# User Inputs
In the next section, we will collect runtime variables needed to execute this notebook.   
This should be the only cell of the notebook you need to edit in order for this notebook to run.

In [2]:
PROJECT_ID = "YOUR_GCP_PROJECT_ID"
# CREDS_PATH = "YOUR_CREDS_PATH_IF_CREATING_ENGINE"

# Create Data Store
If you're creating a Data Store for Vertex Conversation, ensure you have `advanced_site_search` = `True`.

In [3]:
ds = DataStores(project_id=PROJECT_ID)

In [None]:
ds.create_datastore(
    display_name="us-zone-scrapi-datastore",
    solution_type="chat",
    datastore_type="website",
    advanced_site_search=True,
    location="us"
)

## List Data Stores or Get Data Store Map
Use `list_data_stores` to confirm that your Data Store was created, or use the convenient `get_data_stores_map` to get an easy resource mapping for your Data Store Names/IDs

In [None]:
ds_map = ds.get_data_stores_map(reverse=True, location="us")
ds_map

### Inspect Data Store Config

In [None]:
datastore = ds.get_data_store(ds_map["us-zone-scrapi-datastore"])
print(datastore)

### Inspect Data Store Contents

In [None]:
sites = Sites(project_id=PROJECT_ID)

docs = sites.list_sites(ds_map["us-zone-scrapi-datastore"])

for doc in docs:
  print(f"SITE: {doc.generated_uri_pattern}\nLAST UPDATE: {doc.update_time}")

# Add URI Patterns to Data Store
For this example, we'll add 2 patterns:
- Include `www.example.com/blog/*`
- Exclude `www.example.com/news/*`

In [12]:
sites = Sites(project_id=PROJECT_ID)

In [None]:
sites.create_site(
    data_store_id=ds_map["us-zone-scrapi-datastore"],
    uri_pattern="www.example.com/blog/*",
    include_site=True,
    exact_match=False
)

In [None]:
sites.create_site(
    data_store_id=ds_map["us-zone-scrapi-datastore"],
    uri_pattern="www.example.com/news",
    include_site=False,
    exact_match=False
)

## Inspect Data Store to See Sites Added
Notice that one of these is `VERIFIED` and another is `UNVERIFIED`.  
You will still need to use the Google Cloud Console to go through the verifcation proecess per usual.

You can also use `get_site_index_status` or `get_site_verification_status` to check on the status of Indexing and Verification programmatically.

In [None]:
sites.list_sites(ds_map["us-zone-scrapi-datastore"])

# Create Engine!
For the final step, we'll create our Chat Engine.

**NOTE** - Remember that for this step you MUST use a Service Account key!

In [None]:
eng = Engines(PROJECT_ID, creds_path=CREDS_PATH)

In [None]:
eng_proto = eng.build_chat_engine_proto(
    display_name="my-cool-website-engine",
    business_name="Google Example",
    data_store_ids=[ds_map["us-zone-scrapi-datastore"]], # we're using our convenient map to add the IDs here!
)
eng_proto

In [None]:
eng.create_engine(eng_proto)

## List Engines
Finally, verify that your Engine was created successfully!  
You can use `list_engines` or `get_engines_map` to verify.

In [None]:
eng.get_engines_map(reverse=True)

# Search
In this final section, we'll run a search against our newly indexed datastore.

In [None]:
ds = DataStores(project_id=PROJECT_ID)
s = Search()

ds_map = ds.get_data_stores_map(reverse=True, location="us")
ds_map

## Define Search Config
In order to get high quality search results from your new datastore, there are
many different config parameters you can provide for your `search_config`.  
The
full list can be found in the [SearchRequest](https://cloud.google.com/python/docs/reference/discoveryengine/latest/google.cloud.discoveryengine_v1beta.types.SearchRequest) documentation.

Here, we will provide a minimal config for demonstration purposes.

In [26]:
search_config = {
    "data_store_id": f"{ds_map['us-zone-scrapi-datastore']}",
    "query": "What is an example domain?"
}

In [None]:
s.search(search_config)

# Ending and Wrap-Up

In this notebook, we've shown how to programmatically build a DataStore, index some test websites, and query them using a search config.

For more information, see:
- [Vertex AI Search](https://cloud.google.com/generative-ai-app-builder/docs/try-enterprise-search)
- [Verex AI Conversation](https://cloud.google.com/dialogflow/vertex/docs)