In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Model Garden RAG API

Last updated: 9/11/2024

## Onboarding
If you have any questions, please reach out to *Vertex RAG API * team vertex-rag-eng@google.com, for the onboarding process.

## 0. Set up the Environment and Test Project


In [None]:
!pip3 install --force-reinstall google-cloud-aiplatform "numpy<2.0.0" --user

In [None]:
from google.colab import auth

auth.authenticate_user()

# Install gcloud
!pip install google-cloud

**Remember to restart after pip install.**

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## Initialization


In [None]:
import vertexai
from vertexai.preview import rag
from vertexai.preview.generative_models import GenerativeModel, Tool

In [None]:
# Set Project
PROJECT_ID = "your-project-id"  # @param {type:"string"}

In [None]:
vertexai.init(project=PROJECT_ID, location="us-central1")

## Create a RAG corpus


In [None]:
# Configure a Google first-party embedding model
embedding_model_config = rag.EmbeddingModelConfig(
    publisher_model="publishers/google/models/text-embedding-004"
)

# Configure a third-party model or a Google fine-tuned first-party model as a Vertex Endpoint resource
# See https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_e5.ipynb for 
# deploying 3P embedding models to endpoints
ENDPOINT_ID = "your-model-endpoint-id"  # @param {type:"string"}
MODEL_ENDPOINT = "projects/{PROJECT_ID}/locations/us-central1/endpoints/{ENDPOINT_ID}"

embedding_model_config = rag.EmbeddingModelConfig(
    endpoint=MODEL_ENDPOINT,
)

# Configure a Weaviate Vector Database Instance for the corpus
WEAVIATE_HTTP_ENDPOINT = "weaviate-http-endpoint"  # @param {type:"string"}
COLLECTION_NAME = "weaviate-collection-name"  # @param {type:"string"}
API_KEY = "your-secret-manager-resource-name"  # @param {type:"string"}

vector_db = rag.Weaviate(
    weaviate_http_endpoint=WEAVIATE_HTTP_ENDPOINT,
    collection_name=COLLECTION_NAME,
    api_key=API_KEY,
)


# Name your corpus
DISPLAY_NAME = "your-corpus-name"  # @param {type:"string"}

rag_corpus = rag.create_corpus(
    display_name=DISPLAY_NAME, embedding_model_config=embedding_model_config, vector_db=vector_db
)

In [None]:
# Check the corpus just created
rag.list_corpora()

## Upload a file to the corpus

In [None]:
%%writefile test.txt

Here's a demo for Llama3 RAG

In [None]:
rag_file = rag.upload_file(
    corpus_name=rag_corpus.name,
    path="test.txt",
    display_name="test.txt",
    description="my test",
)

## Import files from Google Cloud Storage
Remember to grant "Viewer" access to the "Vertex RAG Data Service Agent" (with the format of service-{project_number}@gcp-sa-vertex-rag.iam.gserviceaccount.com) for your Google Cloud Storage bucket

In [None]:
GS_BUCKET = "gs://your-gs-bucket"  # @param {type:"string"}

response = await rag.import_files_async(  # noqa: F704
    corpus_name=rag_corpus.name,
    paths=[GS_BUCKET],
    chunk_size=512,
    chunk_overlap=50,
)

In [None]:
# Check the files just imported. It may take a few seconds to process the imported files.
list(rag.list_files(corpus_name=rag_corpus.name))

## Import files from Google Drive
Eligible paths can be https://drive.google.com/drive/folders/{folder_id} or https://drive.google.com/file/d/{file_id}.

Remember to grant "Viewer" access to the "Vertex RAG Data Service Agent" (with the format of `service-{project_number}@gcp-sa-vertex-rag.iam.gserviceaccount.com`) for your Drive folder/files.

In [None]:
FILE_ID = "your-file-id"  # @param {type:"string"}
FILE_PATH = f"https://drive.google.com/file/d/{FILE_ID}"

In [None]:
rag.import_files(
    corpus_name=rag_corpus.name,
    paths=[FILE_PATH],
    chunk_size=1024,
    chunk_overlap=100,
)

In [None]:
# Check the files just imported. It may take a few seconds to process the imported files.
list(rag.list_files(corpus_name=rag_corpus.name))

## Import files from Slack

In [None]:
CHANNEL_ID = "your-slack-channel-id"  # @param {type:"string"}
API_KEY_SECRET_VERSION = "your-secret-manager-resource-name"  # @param {type:"string"}

In [None]:
slack_source = rag.SlackChannelsSource(
    channels=[rag.SlackChannel(CHANNEL_ID, API_KEY_SECRET_VERSION)],
)

In [None]:
response = await rag.import_files_async(  # noqa: F704
    corpus_name=rag_corpus.name,
    source=slack_source,
    chunk_size=1024,
    chunk_overlap=200,
)

In [None]:
# Check the files just imported. It may take a few seconds to process the imported files.
list(rag.list_files(corpus_name=rag_corpus.name))

## Import files from Jira

In [None]:
EMAIL = "your-email"  # @param {type:"string"}
SERVER_URI = "your-server.atlassian.net"  # @param {type:"string"}
PROJECT = "your-project-name"  # @param {type:"string"}
CUSTOM_QUERY = "your-custom-jql-query"  # @param {type:"string"}
API_KEY_SECRET_VERSION = "your-secret-manager-resource-name"  # @param {type:"string"}

In [None]:
jira_query = rag.JiraQuery(
    email=EMAIL,
    jira_projects=[PROJECT],
    custom_queries=[CUSTOM_QUERY],
    api_key=API_KEY_SECRET_VERSION,
    server_uri=SERVER_URI,
)

jira_source = rag.JiraSource(
    queries=[jira_query],
)

In [None]:
response = await rag.import_files_async(  # noqa: F704
    corpus_name=rag_corpus.name,
    source=jira_source,
    chunk_size=1024,
    chunk_overlap=200,
)

In [None]:
# Check the files just imported. It may take a few seconds to process the imported files.
list(rag.list_files(corpus_name=rag_corpus.name))

## Generate Content with Rag Retrieval Tool for self-deployed Llama3 model

When retrieval query similarity distance < vector_distance_threshold, generate content will cite the retrieved context (from RagStore).


In [None]:
rag_resource = rag.RagResource(
    rag_corpus=rag_corpus.name,
    # Need to manually get the ids from rag.list_files.
    # rag_file_ids=[],
)

rag_retrieval_tool = Tool.from_retrieval(
    retrieval=rag.Retrieval(
        source=rag.VertexRagStore(
            rag_resources=[rag_resource],  # Currently only 1 corpus is allowed.
            similarity_top_k=5,
            vector_distance_threshold=0.4,
        ),
    )
)

In [None]:
ENDPOINT = "projects/{PROJECT_ID}/locations/us-central1/endpoints/{ENDPOINT_ID}"  # @param {type:"string"}

rag_model = GenerativeModel(ENDPOINT, tools=[rag_retrieval_tool])

In [None]:
QUERY = "What is RAG and why it is helpful?"  # @param {type:"string"}

response = rag_model.generate_content(QUERY)

In [None]:
response

## Generate Content with Rag Retrieval Tool for non-self-deployed Llama3 model endpoint

The retrieved contexts can be passed to any SDK or model generation API to generate final results.


In [None]:
QUERY = "What is RAG and why it is helpful?"  # @param {type:"string"}

rag_resource = rag.RagResource(
    rag_corpus=rag_corpus.name,
    # Need to manually get the ids from rag.list_files.
    # rag_file_ids=[],
)

response = rag.retrieval_query(
    rag_resources=[rag_resource],  # Currently only 1 corpus is allowed.
    text=QUERY,
    similarity_top_k=5,
    vector_distance_threshold=0.4,
)

# The retrieved context can be passed to any SDK or model generation API to generate final results.
retrieved_context = " ".join(
    [context.text for context in response.contexts.contexts]
).replace("\n", "")

In [None]:
retrieved_context

## API reference

For more details on RAG corpus/file management and detailed support please visit https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/rag-api
