In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Question Answering with Large Documents using LlamaIndex on Google cloud
LlamaIndex is a powerful framework that simplifies the process of building RAG (Retrieval Augmented Generation) applications, especially for question answering with large documents. LlamaIndex simplifies the development process, while Vertex AI provides access to high-performing LLMs and scalable infrastructure. This combination enables you to create sophisticated AI applications that can effectively leverage external knowledge sources to provide accurate and informative responses.

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/orchestration/intro_to_llamaindex_rag.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Run in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/orchestration/intro_to_llamaindex_rag.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/orchestration/intro_to_llamaindex_rag.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
</table>

| | |
|-|-|
|Author(s) | [Mona Mona](https://github.com/mona19) |

### Objective

In this tutorial, you learn how to use llamaindex

- Deploy Vertex AI vector search index
- Load document for question answering
- Chunk and embed documents using Vertex AI embeddings into Vector Search index.
- Use Gemini model to ask questions to the index
- Evalute faithfulness of the response
- Cleanup- delete the Vertex AI Vector Store

### Costs

This tutorial uses billable components of Google Cloud:


Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing),
and use the [Pricing Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

## Getting Started

### Install Vertex AI SDK for LLamaIndex, other packages and their dependencies

Install the following packages required to execute this notebook.

In [None]:
! pip install llama-index llama-index-vector-stores-vertexaivectorsearch llama-index-llms-vertex

### Colab only: Uncomment the following cell to restart the kernel.

***Colab only***: Run the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top.

In [None]:
# Automatically restart kernel after installs so that your environment can access the new packages
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

### Authenticating your notebook environment

- If you are using **Colab** to run this notebook, run the cell below and continue.
- If you are using **Vertex AI Workbench**, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env).

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

- If you are running this notebook in a local development environment:
  - Install the [Google Cloud SDK](https://cloud.google.com/sdk).
  - Obtain authentication credentials. Create local credentials by running the following command and following the oauth2 flow (read more about the command [here](https://cloud.google.com/sdk/gcloud/reference/beta/auth/application-default/login)):

    ```bash
    gcloud auth application-default login
    ```

### Import libraries

We recommend you to follow the notebook code and instructions to setup below:
Install LLamaIndex, authenticate your notebook and import necessary libraries using the instructions provided in the notebook.

 Create Vertex AI Vector Search  Index and deploy it to an Endpoint.Vertex AI Vector Search is a fully managed, scalable Google Cloud service designed for high-speed similarity searches across large datasets of high-dimensional vectors, crucial for various AI applications like recommendation systems and semantic search. This step can take 30 minutes. You should complete both of these tasks before moving to the next step.


**Colab only:** Run the following cell to initialize the Vertex AI SDK. For Vertex AI Workbench, you don't need to run this.

In [None]:
# Project and Storage Constants
PROJECT_ID = "<>"
REGION = "us-central1"
GCS_BUCKET_NAME = "your bucket name"
GCS_BUCKET_URI = "gs://your bucket name"

# The number of dimensions for the textembedding-gecko@003 is 768
# If other embedder is used, the dimensions would probably need to change.
VS_DIMENSIONS = 768

# Vertex AI Vector Search Index configuration
# parameter description here
# https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.MatchingEngineIndex#google_cloud_aiplatform_MatchingEngineIndex_create_tree_ah_index
VS_INDEX_NAME = "llamaindex-doc-index"  # @param {type:"string"}
VS_INDEX_ENDPOINT_NAME = "llamaindex-doc-endpoint"


from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)  # @param {type:"string"

In [None]:
# Create a bucket.
! gsutil mb -l REGION−pREGION−pREGION -p PROJECT_ID $GCS_BUCKET_URI

In [None]:
# NOTE : This operation can take upto 30 seconds

# check if index exists
index_names = [
    index.resource_name
    for index in aiplatform.MatchingEngineIndex.list(
        filter=f"display_name={VS_INDEX_NAME}"
    )
]

if len(index_names) == 0:
    print(f"Creating Vector Search index {VS_INDEX_NAME} ...")
    vs_index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
        display_name=VS_INDEX_NAME,
        dimensions=VS_DIMENSIONS,
        distance_measure_type="DOT_PRODUCT_DISTANCE",
        shard_size="SHARD_SIZE_SMALL",
        index_update_method="STREAM_UPDATE",
        approximate_neighbors_count=100,  # allowed values BATCH_UPDATE , STREAM_UPDATE
    )
    print(
        f"Vector Search index {vs_index.display_name} created with resource name {vs_index.resource_name}"
    )
else:
    vs_index = aiplatform.MatchingEngineIndex(index_name=index_names[0])
    print(
        f"Vector Search index {vs_index.display_name} exists with resource name {vs_index.resource_name}"
    )

### Create a Vertex AI Vector Search Endpoint
To use the index, you need to create an index endpoint. It works as a server instance accepting query requests for your index. An endpoint can be a public endpoint or a private endpoint.

Let's create a public endpoint.

In [None]:
endpoint_names = [
    endpoint.resource_name
    for endpoint in aiplatform.MatchingEngineIndexEndpoint.list(
        filter=f"display_name={VS_INDEX_ENDPOINT_NAME}"
    )
]

if len(endpoint_names) == 0:
    print(f"Creating Vector Search index endpoint {VS_INDEX_ENDPOINT_NAME} ...")
    vs_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
        display_name=VS_INDEX_ENDPOINT_NAME, public_endpoint_enabled=True
    )
    print(
        f"Vector Search index endpoint {vs_endpoint.display_name} created with resource name {vs_endpoint.resource_name}"
    )
else:
    vs_endpoint = aiplatform.MatchingEngineIndexEndpoint(
        index_endpoint_name=endpoint_names[0]
    )
    print(
        f"Vector Search index endpoint {vs_endpoint.display_name} exists with resource name {vs_endpoint.resource_name}"
    )

**Deploy Index to the Endpoint¶**
With the index endpoint, deploy the index by specifying a unique deployed index ID.

NOTE : This operation can take upto 30 minutes.

In [None]:
# check if endpoint exists
index_endpoints = [
    (deployed_index.index_endpoint, deployed_index.deployed_index_id)
    for deployed_index in vs_index.deployed_indexes
]

if len(index_endpoints) == 0:
    print(
        f"Deploying Vector Search index {vs_index.display_name} at endpoint {vs_endpoint.display_name} ..."
    )
    vs_deployed_index = vs_endpoint.deploy_index(
        index=vs_index,
        deployed_index_id="new_deployed_index_id",
        display_name=VS_INDEX_NAME,
        machine_type="e2-standard-16",
        min_replica_count=1,
        max_replica_count=1,
    )
    print(
        f"Vector Search index {vs_index.display_name} is deployed at endpoint {vs_deployed_index.display_name}"
    )
else:
    vs_deployed_index = aiplatform.MatchingEngineIndexEndpoint(
        index_endpoint_name=index_endpoints[0][0]
    )
    print(
        f"Vector Search index {vs_index.display_name} is already deployed at endpoint {vs_deployed_index.display_name}"
    )

In [None]:
# import modules needed
from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex
from llama_index.embeddings.vertex import VertexTextEmbedding
from llama_index.llms.vertex import Vertex
from llama_index.vector_stores.vertexaivectorsearch import VertexAIVectorStore

**Parse, Index and Query PDFs using Vertex AI Vector Search and Gemini Pro**

In [None]:
! mkdir -p ./data/arxiv/
! wget 'https://arxiv.org/pdf/1706.03762.pdf' -O ./data/arxiv/test.pdf

### Ingesting documents
In this step, we will be using the LlamaIndex utility class called SimpleDirectoryReader. This can  easily load and parse various file types from a local directory. It automatically handles different formats, extracts metadata, and can be read recursively.

The following code creates a directory structure "./data/arxiv/" using the mkdir command with the -p flag to ensure all parent directories are created. It then downloads the PDF of the "Attention Is All You Need" paper from arXiv using wget command, saving it as "test.pdf" in the new directory. Next, we are using SimpleDirectoryReader from LlamaIndex  to read the contents of the "./data/arxiv/" directory and load the documents. Finally, it prints the number of documents loaded, which should be 1 for the downloaded PDF.

In [None]:
# load documents
documents = SimpleDirectoryReader("./data/arxiv/").load_data()
print(f"# of documents = {len(documents)}")

! mkdir -p ./data/arxiv/
! wget 'https://arxiv.org/pdf/1706.03762.pdf' -O ./data/arxiv/test.pdf

### Import models and intiatilze Vector Store

You load the pre-trained text and embeddings generation model

In [None]:
# setup storage
vector_store = VertexAIVectorStore(
    project_id=PROJECT_ID,
    region=REGION,
    index_id=vs_index.resource_name,
    endpoint_id=vs_endpoint.resource_name,
    gcs_bucket_name=GCS_BUCKET_NAME,
)

# set storage context
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# configure embedding model
embed_model = VertexTextEmbedding(
    model_name="textembedding-gecko@003",
    project=PROJECT_ID,
    location=REGION,
)

vertex_gemini = Vertex(model="gemini-pro", temperature=0, additional_kwargs={})

###Create embeddings and storing embeddings in Vertex AI Vector search

We are using VectorStoreIndex.from_documents() from LlamaIndex which creates a vector index from the given documents, using the specified storage context from Vertex AI vector search and Vertex AI embedding model created in the previous step.


In [None]:
# define index from vector store
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, embed_model=embed_model
)

###Using LlamaIndex query engine to query

Now we  construct an query_engine object from your LlamaIndex index. The query_engine is responsible for processing user queries, retrieving relevant information from the index, and generating responses using the specified LLM which is Vertex AI Gemini.

In [None]:
query_engine = index.as_query_engine()

**Set up Query engine with Gemini **

In [None]:
llm = vertex_gemini
query_engine = index.as_query_engine(
    llm=llm,
    similarity_top_k=3,
)

In [None]:
response = query_engine.query("who are the authors of paper Attention is All you need?")

print("Response:")
print("-" * 80)
print(response.response)
print("-" * 80)
print("Source Documents:")
print("-" * 80)
for source in response.source_nodes:
    print(f"Sample Text: {source.text[:50]}")
    print(f"Relevance score: {source.get_score():.3f}")
    print(f"File Name: {source.metadata.get('file_name')}")
    print(f"Page #: {source.metadata.get('page_label')}")
    print(f"File Path: {source.metadata.get('file_path')}")
    print("-" * 80)

**Response Evaluation**

Does the response match the retrieved context? Does it also match the query? Does it match the reference answer or guidelines? Here's a simple example that evaluates a single response for Faithfulness, i.e. whether the response is aligned to the context, such as being free from hallucinations:

In [None]:
from llama_index.core.evaluation import FaithfulnessEvaluator

In [None]:
# define evaluator
evaluator = FaithfulnessEvaluator(llm=llm)

In [None]:
# query index
!pip install nest_asyncio

The response contains both the response and the source from which the response was generated; the evaluator compares them and determines if the response is faithful to the source.

In [None]:
import nest_asyncio

nest_asyncio.apply()
response = query_engine.query("who are the authors of paper Attention is All you need?")
eval_result = evaluator.evaluate_response(response=response)
print(str(eval_result.passing))

**Clean Up**

Please delete Vertex AI Vector Search Index and Index Endpoint after running your experiments to avoid incurring additional charges. Please note that you will be charged as long as the endpoint is running.

⚠️ NOTE: Enabling `CLEANUP_RESOURCES` flag deletes Vector Search Index, Index Endpoint and Cloud Storage bucket. Please run it with caution.

In [None]:
CLEANUP_RESOURCES = True

Undeploy indexes and Delete index endpoint

In [None]:
if CLEANUP_RESOURCES:
    print(
        f"Undeploying all indexes and deleting the index endpoint {vs_endpoint.display_name}"
    )
    vs_endpoint.undeploy_all()
    vs_endpoint.delete()

In [None]:
if CLEANUP_RESOURCES:
    print(f"Deleting the index {vs_index.display_name}")
    vs_index.delete()

In [None]:
if CLEANUP_RESOURCES and "GCS_BUCKET_NAME" in globals():
    print(f"Deleting contents from the Cloud Storage bucket {GCS_BUCKET_NAME}")

    shell_output = ! gsutil du -ash gs://GCS_BUCKET_NAME    print(shell_output)    print(        f"Size of the bucket {GCS_BUCKET_NAME} before deleting = {' '.join(shell_output[0].split()[:2])}"    )    # uncomment below line to delete contents of the bucket    # ! gsutil -m rm -r gs://GCS_BUCKET_NAME    print(shell_output)    print(        f"Size of the bucket {GCS_BUCKET_NAME} before deleting = {' '.join(shell_output[0].split()[:2])}"    )    # uncomment below line to delete contents of the bucket    # ! gsutil -m rm -r gs://GCS_BUCKET_NAME
    print(shell_output)
    print(
        f"Size of the bucket {GCS_BUCKET_NAME} before deleting = {' '.join(shell_output[0].split()[:2])}"
    )

    # uncomment below line to delete contents of the bucket
    # ! gsutil -m rm -r gs://GCS_BUCKET_NAME