### Create a Search App with Mixed Datastores

1. Follow the steps listed here to create a Search App https://cloud.google.com/generative-ai-app-builder/docs/create-engine-es 
2. Create the relevant datastores(GCS, BQ, Website) https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es
3. Link the Datstores to the Search App https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es#multi-data-stores

### Install the Relevant packages

In [None]:
!pip install google-cloud-discoveryengine

### Import the Relevant packages

In [4]:
#import the relevant library
#ignore the warnings

from typing import List
from google.api_core.client_options import ClientOptions
from google.cloud import discoveryengine as discoveryengine
import vertexai
from vertexai.preview.generative_models import GenerativeModel, Part
import vertexai.preview.generative_models as generative_models
import requests
import subprocess  # To obtain the access token
import re
import json



In [22]:
PROJECT_ID="PROJECT_ID"
SEARCH_APP_LOCATION="global or us"
SEARCH_ENGINE_ID="VERTEX_SEARCH_ENGINE_ID"
LOCATION_GEMINI_MODEl="northamerica-northeast1"

### Send a Request to Vertex Search App with Data Blending(Mixed Datastore) 
#### https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es#multi-data-stores

In [None]:
# Obtain the access token
access_token = subprocess.check_output(['gcloud', 'auth', 'print-access-token']).decode('utf-8').strip()

# Construct the API endpoint URL
url = "https://discoveryengine.googleapis.com/v1beta/projects/" + PROJECT_ID + "/locations/" + SEARCH_APP_LOCATION + "/collections/default_collection/engines/" + SEARCH_ENGINE_ID + "/servingConfigs/default_search:search"

# Headers for the request
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Data payload for the POST request
data = {
    "servingConfig": "projects/" + PROJECT_ID + "/locations/" + SEARCH_APP_LOCATION + "/collections/default_collection/engines/" + SEARCH_ENGINE_ID + "/servingConfigs/default_search", 
    "query": "How many 10-ks in the datastore ",  # <- insert your search prompt/query 
    "pageSize": "10" 
}

# Send the POST request
response = requests.post(url, headers=headers, json=data) 

# Check for successful response
if response.status_code == 200:
    output = response.text
    print(output)
else:
    print(f"Request failed with status code: {response.status_code}")
    #uncommen below to see the results
    #print(response.text) 

# Store response is in a variable called 'response_data'
response_data = response.json()


In [20]:
#Uncomment to extract snippets from search results

# for idx, result in enumerate(response_data['results']):
#     document = result['document']
#     if 'derivedStructData' in document:
#         print(f"\n--- Snippets from Document {idx+1} ---")
#         for snippet_item in document['derivedStructData'].get('snippets', []):
#             print(snippet_item['snippet'])
            


#Uncomment to clean up regex from snippets

# snippets_list = []

# for idx, result in enumerate(response_data['results']):
#     # ... (your existing code)
#         for snippet_item in document['derivedStructData'].get('snippets', []):
#             snippets_list.append(snippet_item['snippet'])
# for item in snippets_list:
#     clean_text = re.sub('<[^>]*>', '', item)  
#     print(clean_text)
#     print("\n") 

### Feed the Search result snippets to Gemini Pro model and formuate a summary/response based on your original prompt

In [23]:
def generate():
  vertexai.init(project=PROJECT_ID, location=LOCATION_GEMINI_MODEl)
  model = GenerativeModel("gemini-1.0-pro-001") #specify the gemini model version
  responses = model.generate_content(
     str(response_data) + " organize the json results based on the question :" +data['query'],
    generation_config={
        "max_output_tokens": 2048,
        "temperature": 0.9,
        "top_p": 1
    },
    safety_settings={
          generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
          generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
          generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
          generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    },
    stream=True,
  )
  
  for response in responses:
    print(response.text, end="")


generate()

There are 4 10-ks in the datastore.