In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Question Answering with Documents using LangChain 🦜️🔗 and BigQuery Vector Search

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/language/use-cases/document-qa/question_answering_documents_langchain_bigquery_vector_search.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Run in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/language/use-cases/document-qa/question_answering_documents_langchain_bigquery_vector_search.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/blob/main/language/use-cases/document-qa/question_answering_documents_langchain_bigquery_vector_search.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
</table>

| | |
|-|-|
|Author(s) | [Ashley Xu](https://github.com/ashleyxuu) |

## Overview

This notebook demonstrates implementing a Question Answering (QA) system to show how to improve LLM's response by augmenting LLM's knowledge with external data sources such as documents. The notebooks uses Vertex AI PaLM API for [Text](https://cloud.google.com/vertex-ai/docs/generative-ai/text/text-overview), [Embeddings for Text API](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings), [BigQuery Vector Search](https://python.langchain.com/docs/integrations/vectorstores/google_bigquery_vector_search) and [LangChain 🦜️🔗](https://python.langchain.com/en/latest/).

### Context

Large Language Models (LLMs) have improved quantitatively and qualitatively. They can learn new abilities without being directly trained on them. However, there are constraints with LLMs - they are unaware of events after training and it is almost impossible to trace the sources to their responses. It is preferred for LLM based systems to cite their sources and be grounded in facts.

To solve for the constraints, one of the approaches is to augment the prompt sent to LLM with relevant data retrieved from an external knowledge base through Information Retrieval (IR) mechanism.

This approach is called Retrieval Augmented Generation (RAG), also known as Generative QA in the context of the QA task. There are two main components in RAG based architecture: (1) Retriever and (2) Generator.

## Getting Started

### Install Vertex AI SDK, other packages and their dependencies

Install the following packages required to execute this notebook.

In [None]:
# Install LangChain and Google Cloud BigQuery
!pip install --upgrade --quiet tiktoken langchain langchain_google_vertexai google-cloud-bigquery

#### Restart current runtime

To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which will restart the current kernel.

In [None]:
# Automatically restart kernel after installs so that your environment can access the new packages
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ Before proceeding, please wait for the kernel to finish restarting ⚠️</b>
</div>

### Authenticating your notebook environment
If you are using Colab, you will need to authenticate yourself first. The next cell will check if you are currently using Colab, and will start the authentication process.

If you are using Vertex AI Workbench, you will not require additional authentication.

For more information, you can check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env).

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

- If you are running this notebook in a local development environment:
  - Install the [Google Cloud SDK](https://cloud.google.com/sdk).
  - Obtain authentication credentials. Create local credentials by running the following command and following the oauth2 flow (read more about the command [here](https://cloud.google.com/sdk/gcloud/reference/beta/auth/application-default/login)):

    ```bash
    gcloud auth application-default login
    ```

# Set Up

In [None]:
# @title Project { display-mode: "form" }
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

In [None]:
# @title Region { display-mode: "form" }
REGION = "US"  # @param {type: "string"}

# Add documents to BigQueryVectorSearch

This step ingests and parse PDF documents, split them, generate embeddings and add the embeddings to the vector store. The document corpus used as dataset is a collection of owners car manual.

**Summary steps**
- Create text embeddings: LangChain VertexAIEmbeddings
- Ingest PDF files: LangChain GCSDirectoryLoader
- Chunk documents: LangChain TextSplitter
- Create Vector Store: LangChain BigQueryVectorSearch

### Create the VertexAI Embedding model

In [None]:
from langchain.vectorstores.utils import DistanceStrategy
from langchain_community.vectorstores import BigQueryVectorSearch

In [None]:
from langchain_google_vertexai import VertexAIEmbeddings

embedding = VertexAIEmbeddings(
    model_name="textembedding-gecko@latest", project=PROJECT_ID
)

### Ingest PDF file

The document is hosted on Cloud Storage bucket (at `gs://github-repo/generative-ai/sample-apps/fixmycar/cymbal-starlight-2024.pdf `) and LangChain provides a convenient document loader [`GCSDirectoryLoader`](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/google_cloud_storage_directory.html) to load documents from a Cloud Storage bucket. The loader uses `Unstructured` package to load files of many types including pdfs, images, html and more.

In [None]:
# Dependencies required by Unstructured PDF loader
! sudo apt -y -qq install tesseract-ocr libtesseract-dev
! sudo apt-get -y -qq install poppler-utils
! pip install --user --quiet unstructured pdf2image==1.16.3 pytesseract==0.3.10 pdfminer.six==20221105 unstructured

In [None]:
from langchain.document_loaders import GCSDirectoryLoader

Make a Google Cloud Storage bucket in your GCP project to copy the document files into.

In [None]:
GCS_BUCKET_DOCS = f"{PROJECT_ID}-fixmycar"
! set -x && gsutil mb -p $PROJECT_ID -l us-central1 gs://$GCS_BUCKET_DOCS

Copy document files to your bucket

In [None]:
folder_prefix = "manual/"
!gsutil rsync -r -x ".*(?<!\.pdf)$" gs://github-repo/generative-ai/sample-apps/fixmycar/ gs://$GCS_BUCKET_DOCS/$folder_prefix

In [None]:
# Ingest PDF files

print(f"Processing documents from {GCS_BUCKET_DOCS}")
loader = GCSDirectoryLoader(
    project_name=PROJECT_ID, bucket=GCS_BUCKET_DOCS, prefix=folder_prefix
)
documents = loader.load()

# Add document name and source to the metadata
for document in documents:
    doc_md = document.metadata
    document_name = doc_md["source"].split("/")[-1]
    # derive doc source from Document loader
    doc_source_prefix = "/".join(GCS_BUCKET_DOCS.split("/")[:3])
    doc_source_suffix = "/".join(doc_md["source"].split("/")[4:-1])
    source = f"{doc_source_prefix}/{doc_source_suffix}"
    document.metadata = {"source": source, "document_name": document_name}

print(f"# of documents loaded (pre-chunking) = {len(documents)}")

Verify document metadata

In [None]:
documents[0].metadata

## Chunk documents - TextSplitter

Split the documents to smaller chunks. When splitting the document, ensure a few chunks can fit within the context length of LLM.

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=50,
    separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
)
doc_splits = text_splitter.split_documents(documents)

# Add chunk number to metadata
for idx, split in enumerate(doc_splits):
    split.metadata["chunk"] = idx

print(f"# of documents = {len(doc_splits)}")

In [None]:
doc_splits[0].metadata

## Configure BigQueryVectorSearch as Vector Store

In [None]:
DATASET = "[my-dataset]"  # @param {type: "string"}
TABLE = "[my-table]"  # @param {type: "string"}

In [None]:
bq_vector_cars_manual = BigQueryVectorSearch(
    project_id=PROJECT_ID,
    dataset_name=DATASET,
    table_name=TABLE,
    location=REGION,
    embedding=embedding,
)

In [None]:
bq_vector_cars_manual.add_documents(doc_splits)

Verify the BigQueryVectorSearch with similarity search

In [None]:
bq_vector_cars_manual.similarity_search(
    "What should I do when call the emergency roadside assistance?"
)

# Retrieval based Question/Answering Chain

We will demonstrate using four LangChain retrieval Q&A chains:

- `RetrievalQA`
- `RetrievalQAWithSourcesChain`
- `ConversationalRetrievalChain`
- Advanced: customized Q&A prompt and format

We begin by initializing a Vertex AI LLM and a LangChain 'retriever' to fetch documents from our BigQuery Vector Search.

For Q&A chains our retriever is passed directly to the chain and can be used automatically without any further configuration.

Behind the scenes, first the search query is passed to the retriever which runs a search and returns relevant document chunks. These chunks are then passed to the prompt used by the LLM to be used as context.

In [None]:
from langchain_google_vertexai import VertexAI
from langchain.retrievers import GoogleVertexAISearchRetriever

llm = VertexAI(model_name="text-bison@001")

retriever = bq_vector_cars_manual.as_retriever()

### [`RetrievalQA` chain](https://python.langchain.com/docs/modules/chains/popular/vector_db_qa)

This is the simplest document Q&A chain offered by LangChain.

There are several different chain types available, listed [here](https://docs.langchain.com/docs/components/chains/index_related_chains).

- In these examples we use the `stuff` type, which simply inserts all of the document chunks into the prompt.
- This has the advantage of only making a single LLM call, which is faster and more cost efficient
- However, if we have a large number of search results we run the risk of exceeding the token limit in our prompt, or truncating useful information.
- Other chain types such as `map_reduce` and `refine` use an iterative process which makes multiple LLM calls, taking individual document chunks at a time and refining the answer iteratively.


In [None]:
from langchain.chains import RetrievalQA

search_query = "Why my tire warning light flashes"  # @param {type:"string"}

retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever
)
retrieval_qa.invoke(search_query)

#### Inspecting the process

If we add `return_source_documents=True` we can inspect the document chunks that were returned by the retriever.

This is helpful for debugging, as these chunks may not always be relevant to the answer, or their relevance might not be obvious.

In [None]:
retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
)

results = retrieval_qa.invoke({"query": search_query})

print("*" * 79)
print(results["result"])
print("*" * 79)
for doc in results["source_documents"]:
    print("-" * 79)
    print(doc.page_content)

### `RetrievalQAWithSourcesChain`

This variant returns an answer to the question alongside the source documents that were used to generate the answer.

In [None]:
from langchain.chains import RetrievalQAWithSourcesChain

retrieval_qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever
)

retrieval_qa_with_sources({"question": search_query}, return_only_outputs=True)

### [`ConversationalRetrievalChain`](https://python.langchain.com/docs/modules/chains/popular/chat_vector_db)

`ConversationalRetrievalChain` remembers and uses previous questions so you can have a chat-like discovery process.

To use this chain we must provide a memory class to store and pass the previous messages to the LLM as context. Here we use the `ConversationBufferMemory` class that comes with LangChain.


In [None]:
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
conversational_retrieval = ConversationalRetrievalChain.from_llm(
    llm=llm, retriever=retriever, memory=memory
)

search_query = "I’ve driven 1500 miles since my last oil change. How many miles can I drive until I need service?"

result = conversational_retrieval({"question": search_query})
print(result["answer"])

In [None]:
new_query = "What about costs and expenses for the oil change?"
result = conversational_retrieval({"question": new_query})
print(result["answer"])

In [None]:
new_query = "Do you have any car care tips?"
result = conversational_retrieval({"question": new_query})
print(result["answer"])

## Advanced: Modifying the default langchain prompt

In all of the previous examples we used the default prompt that comes with langchain.

We can inspect our chain object to discover the wording of the prompt template being used.

We may find that this is not suitable for our purposes, and we may wish to customise the prompt, for example to present our results in a different format, or to specify additional constraints.

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
)

print(qa.combine_documents_chain.llm_chain.prompt.template)

---

Let's modify the prompt to return an answer in a single word (useful for yes/no questions). We will constrain the LLM to say 'I don't know' if it cannot answer.

We create a new prompt_template and pass this in using the `template` argument.

In [None]:
from langchain.prompts import PromptTemplate

template = """SYSTEM: You are an intelligent assistant helping the users with their questions on their car manual.

Question: {question}

Strictly Use ONLY the following pieces of context to answer the question at the end. Think step-by-step and then answer.

Do not try to make up an answer:
 - If the answer to the question cannot be determined from the context alone, say "I cannot determine the answer to that."
 - If the context is empty, just say "I do not know the answer to that."

=============
{context}
=============

Question: {question}
Helpful Answer:"""

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

We can also customize the retriever

In [None]:
# Create chain to answer questions
NUMBER_OF_RESULTS = 10
SEARCH_DISTANCE_THRESHOLD = 0.6

# Expose index to the retriever
retriever = bq_vector_cars_manual.as_retriever(
    search_type="similarity",
    search_kwargs={
        "k": NUMBER_OF_RESULTS,
        "search_distance": SEARCH_DISTANCE_THRESHOLD,
    },
)

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    verbose=True,
    chain_type_kwargs={
        "prompt": PromptTemplate(
            template=template,
            input_variables=["context", "question"],
        ),
    },
)

In [None]:
print(qa.combine_documents_chain.llm_chain.prompt.template)

In [None]:
# Enable for troubleshooting
qa.combine_documents_chain.verbose = True
qa.combine_documents_chain.llm_chain.verbose = True
qa.combine_documents_chain.llm_chain.llm.verbose = True

import textwrap


def formatter(result):
    print(f"Query: {result['query']}")
    print("." * 80)
    if "source_documents" in result.keys():
        for idx, ref in enumerate(result["source_documents"]):
            print("-" * 80)
            print(f"REFERENCE #{idx}")
            print("-" * 80)
            if "score" in ref.metadata:
                print(f"Matching Score: {ref.metadata['score']}")
            if "source" in ref.metadata:
                print(f"Document Source: {ref.metadata['source']}")
            if "document_name" in ref.metadata:
                print(f"Document Name: {ref.metadata['document_name']}")
            print("." * 80)
            print(f"Content: \n{wrap(ref.page_content)}")
    print("." * 80)
    print(f"Response: {wrap(result['result'])}")
    print("." * 80)


def wrap(s):
    return "\n".join(textwrap.wrap(s, width=120, break_long_words=False))


def ask(query, qa=qa, k=NUMBER_OF_RESULTS, search_distance=SEARCH_DISTANCE_THRESHOLD):
    qa.retriever.search_kwargs["search_distance"] = search_distance
    qa.retriever.search_kwargs["k"] = k
    result = qa({"query": query})
    return formatter(result)

In [None]:
ask("What is the capacity of my fuel tank?")