In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Getting Started with the Vertex AI Gemini API & Python SDK

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/ShantamGupta/gcp_vertex_search_data_blending/blob/main/vertex_search_data_blending_with_gemini_summarization.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Run in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FShantamGupta%2Fgcp_vertex_search_data_blending%2Fmain%2Fvertex_search_data_blending_with_gemini_summarization.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Run in Colab Enterprise
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://github.com/ShantamGupta/gcp_vertex_search_data_blending/blob/main/vertex_search_data_blending_with_gemini_summarization.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/ShantamGupta/gcp_vertex_search_data_blending/main/vertex_search_data_blending_with_gemini_summarization.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
</table>


## Overview

### Search
Vertex AI Search brings together the power of deep information retrieval, state-of-the-art natural language processing, and the latest in large language processing to understand user intent and return the most relevant results for the user.

With Vertex AI Search, you can create apps for searching and for making recommendations. Vertex AI Search also has special capabilities for some industries, such as media, healthcare, and retail.



### Gemini

Gemini is a family of generative AI models developed by Google DeepMind that is designed for multimodal use cases. The Gemini API gives you access to the Gemini Pro and Gemini Pro Vision models.

### Vertex AI Gemini API

The Vertex AI Gemini API provides a unified interface for interacting with Gemini models. There are two Gemini 1.0 Pro models available in the Gemini API:

- **Gemini 1.0 Pro model** (`gemini-1.0-pro`): Designed to handle natural language tasks, multi-turn text and code chat, and code generation.
.

You can interact with the Gemini API using the following methods:

- Use [Vertex AI Studio](https://cloud.google.com/generative-ai-studio) for quick testing and command generation
- Use cURL commands
- Use the Vertex AI SDK

For more information, see the [Generative AI on Vertex AI](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview) documentation.

This tutorial explains how to call a search app with mixed datastore, get search snippets and summarize the response using Gemini Pro. 


### Create a Search App with Mixed Datastores

1. Follow the steps listed here to create a Search App https://cloud.google.com/generative-ai-app-builder/docs/create-engine-es 
2. Create the relevant datastores(GCS, BQ, Website) https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es
3. Link the Datstores to the Search App https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es#multi-data-stores

### Install the Relevant packages

In [None]:
!pip install --upgrade google-cloud-aiplatform

### Restart current runtime

To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which will restart the current kernel.

In [None]:
# Restart kernel after installs so that your environment can access the new packages
import IPython
import time

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

### Authenticate your notebook environment (Colab only)

If you are running this notebook on Google Colab, run the following cell to authenticate your environment. This step is not required if you are using [Vertex AI Workbench](https://cloud.google.com/vertex-ai-workbench).


In [None]:
import sys

# Additional authentication is required for Google Colab
if "google.colab" in sys.modules:
    # Authenticate user to Google Cloud
    from google.colab import auth

    auth.authenticate_user()

### Define Google Cloud project information

In [None]:
PROJECT_ID="PROJECT_ID"
SEARCH_APP_LOCATION="global or us"
SEARCH_ENGINE_ID="VERTEX_SEARCH_ENGINE_ID"
LOCATION_GEMINI_MODEl="northamerica-northeast1"

### Import the Relevant packages

In [None]:
#import the relevant library

from typing import List
import vertexai
from vertexai.preview.generative_models import GenerativeModel
import vertexai.preview.generative_models as generative_models
import requests
import subprocess  # To obtain the access token
import re
import json

### Send a Request to Vertex Search App with Data Blending(Mixed Datastore) 
#### https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es#multi-data-stores

In [None]:
# Obtain the access token
access_token = subprocess.check_output(['gcloud', 'auth', 'print-access-token']).decode('utf-8').strip()

# Construct the API endpoint URL
url = "https://discoveryengine.googleapis.com/v1beta/projects/" + PROJECT_ID + "/locations/" + SEARCH_APP_LOCATION + "/collections/default_collection/engines/" + SEARCH_ENGINE_ID + "/servingConfigs/default_search:search"

# Headers for the request
headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

# Data payload for the POST request
data = {
    "servingConfig": "projects/" + PROJECT_ID + "/locations/" + SEARCH_APP_LOCATION + "/collections/default_collection/engines/" + SEARCH_ENGINE_ID + "/servingConfigs/default_search", 
    "query": "How many 10-ks are there in the datastore ",  # <- insert your search prompt/query 
    "pageSize": "10" 
}

# Send the POST request
response = requests.post(url, headers=headers, json=data) 

# Check for successful response
if response.status_code == 200:
    output = response.text
    print(output)
else:
    print(f"Request failed with status code: {response.status_code}")
    #uncommen below to see the results
    #print(response.text) 

# Store response is in a variable called 'response_data'
response_data = response.json()


In [None]:
#Uncomment to extract snippets from search results

# for idx, result in enumerate(response_data['results']):
#     document = result['document']
#     if 'derivedStructData' in document:
#         print(f"\n--- Snippets from Document {idx+1} ---")
#         for snippet_item in document['derivedStructData'].get('snippets', []):
#             print(snippet_item['snippet'])
            


#Uncomment to clean up regex from snippets

# snippets_list = []

# for idx, result in enumerate(response_data['results']):
#     # ... (your existing code)
#         for snippet_item in document['derivedStructData'].get('snippets', []):
#             snippets_list.append(snippet_item['snippet'])
# for item in snippets_list:
#     clean_text = re.sub('<[^>]*>', '', item)  
#     print(clean_text)
#     print("\n") 

### Feed the Search result snippets to Gemini Pro model and formuate a summary/response based on your original prompt

In [None]:
def generate():
  vertexai.init(project=PROJECT_ID, location=LOCATION_GEMINI_MODEl)
  model = GenerativeModel("gemini-1.0-pro-001") #specify the gemini model version
  responses = model.generate_content(
     str(response_data) + " organize the json results based on the question :" + data['query'],
    generation_config={
        "max_output_tokens": 2048,
        "temperature": 0.9,
        "top_p": 1
    },
    safety_settings={
          generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
          generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
          generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
          generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    },
    stream=True,
  )
  
  for response in responses:
    print(response.text, end="")


generate()