In [None]:
# Copyright 2023 Nils Knieling
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Building AI-powered applications using LangChain and Google GenAI App Builder (Google Enterprise Search)

[![Open in Colab](https://img.shields.io/badge/Open%20in%20Colab-%23F9AB00.svg?logo=googlecolab&logoColor=white)](https://colab.research.google.com/github/Cyclenerd/toolbox/blob/master/notebooks/GenAI_App_Builder_Question_Answering.ipynb)
[![Open in Vertex AI Workbench](https://img.shields.io/badge/Open%20in%20Vertex%20AI%20Workbench-%234285F4.svg?logo=googlecloud&logoColor=white)](https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/Cyclenerd/toolbox/master/notebooks/GenAI_App_Builder_Question_Answering.ipynb)
[![View on GitHub](https://img.shields.io/badge/View%20on%20GitHub-181717.svg?logo=github&logoColor=white)](https://github.com/Cyclenerd/toolbox/blob/master/notebooks/GenAI_App_Builder_Question_Answering.ipynb)

Portions of this notebook are modifications based on work created and shared by [Google](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gen-app-builder/retrieval-augmented-generation/examples/question_answering.ipynb).

## Install required packages

>⚠️ You may receive a warning to "Restart Runtime" after the packages are installed. Don't worry, the subsequent cells will help you restart the runtime.

In [None]:
#@markdown ### Install dependencies

!pip install langchain==0.0.244
!pip install -U google-cloud-aiplatform==1.28.1 "shapely < 2.0.0"
!pip install google-cloud-storage==2.8.0
!pip install google-cloud-discoveryengine==0.9.1
!pip install pydantic==1.10.8
!pip install typing-inspect==0.8.0
!pip install typing_extensions==4.5.0

print("☑️ Done")

In [None]:
#@markdown ### Restart

# Automatically restart kernel after installs so that your environment
# can access the new packages.
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

print("☑️ Done")

## Setup Google Cloud environment

In [None]:
# @markdown ✏️ Replace the placeholder text below:

# Please fill in these values.
project_id = "test-nils-ai"  # @param {type:"string"}
region = "us-central1"  # @param {type:"string"}
search_engine_id = "test-nils-man"  # @param {type:"string"}
llm_model = "text-bison@001" # @param {type:"string"}

# Quick input validations.
assert project_id, "⚠️ Please provide a Google Cloud project ID"
assert region, "⚠️ Please provide a Google Cloud region"
assert search_engine_id, "⚠️ Please provide a GenAI App Builder search engine ID"
assert llm_model, "⚠️ Please provide a pretrained LLM"

# Configure gcloud.
!gcloud config set project "{project_id}"
!gcloud config set storage/parallel_composite_upload_enabled "True"

print("☑️ Done")

In [1]:
#@markdown ### (Colab only!) Authenticate your Google Cloud Account

import os
import sys

if "google.colab" in sys.modules:
    from google.colab import auth as google_auth

    google_auth.authenticate_user()

In [None]:
#@markdown ###  Check authenticated user
current_user = !gcloud auth list \
  --filter="status:ACTIVE" \
  --format="value(account)" \
  --quiet

current_user = current_user[0]
print(f"Current user: {current_user}")

In [None]:
#@markdown ### Enable APIs

# Enable APIs
my_google_apis = [
    "storage.googleapis.com",
    "aiplatform.googleapis.com",
    "discoveryengine.googleapis.com",
]

for api in my_google_apis :
  print(f"Enable API: {api}")
  !gcloud services enable "{api}" \
    --project="{project_id}" \
    --quiet

print("☑️ OK")

## LangChain & Vertex AI

In [None]:
#@markdown #### Import and print versions

import sys
print(f"☑️ Python: {sys.version}")

# Langchain
import langchain
from langchain.llms import VertexAI
from langchain.retrievers import GoogleCloudEnterpriseSearchRetriever

# Enterprise Search
retriever = GoogleCloudEnterpriseSearchRetriever(
    project_id=project_id,
    search_engine_id=search_engine_id
)

print(f"☑️ LangChain version: {langchain.__version__}")

# Vertex AI
# https://python.langchain.com/docs/integrations/llms/google_vertex_ai_palm
from google.cloud import aiplatform, aiplatform_v1beta1
from langchain.llms import VertexAI

aiplatform.init(
    project=project_id,
    location=region,
)

# LLM model
llm = VertexAI(
    model_name=llm_model,
    max_output_tokens=256,
    temperature=0.1,
    top_p=0.8,
    top_k=40,
    verbose=True,
)

print(f"☑️ Vertex AI SDK version: {aiplatform.__version__}")

## Query

In [None]:
# @markdown Enter search query in a simple English text.
user_query = "What is the default shell and the command to show the uptime?"  # @param {type:"string"}

# Quick input validations.
assert user_query, "⚠️ Please input a valid input search text"

### [RetrievalQA chain](https://python.langchain.com/docs/modules/chains/popular/vector_db_qa)

This is the simplest document Q&A chain offered by langchain.

There are several different chain types available, listed [here](https://docs.langchain.com/docs/components/chains/index_related_chains).
* In these examples we use the `stuff` type, which simply inserts all of the document chunks into the prompt.
* This has the advantage of only making a single LLM call, which is faster and more cost efficient
* However, if we have a large number of search results we run the risk of exceeding the token limit in our prompt, or truncating useful information.
* Other chain types such as `map_reduce` and `refine` use an iterative process which makes multiple LLM calls, taking individual document chunks at a time and refining the answer iteratively.

In [16]:
from langchain.chains import RetrievalQA

retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever
)
retrieval_qa.run(user_query)

'The uptime command can be used to see the uptime.'

#### Inspecting the process

If we add `return_source_documents=True` we can inspect the document chunks that were returned by the retriever.

This is helpful for debugging, as these chunks may not always be relevant to the answer, or their relevance might not be obvious.

In [19]:
retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
)

results = retrieval_qa({"query": user_query})

print("\nContex:")
for doc in results["source_documents"]:
    print("-" * 79)
    print(doc.page_content)

print("\nResult:")
print("=" * 79)
print(results["result"])
print("=" * 79)


Contex:
-------------------------------------------------------------------------------
uptime(1) — Linux manual pages

Linux manual pages	 Section 1	  

Name

uptime — tell how long the system has been running

Synopsis

uptime [OPTION...] [FILE]

DESCRIPTION

Print the current time, the length of time the system has been up, the number of users on the system, and the average number of jobs in the run queue over the last 1, 5 and 15 minutes. Processes in an uninterruptible sleep state also contribute to the load average. If FILE is not specified, use /var/run/utmp. /var/log/wtmp as FILE is common.

−−help

display this help and exit

−−version

output version information and exit

AUTHOR

Written by Joseph Arceneaux, David MacKenzie, and Kaveh Ghazi.

REPORTING BUGS

GNU coreutils online help: <http://www.gnu.org/software/coreutils/>

Report uptime translation bugs to <http://translationproject.org/team/>

SEE ALSO

Full documentation at: <http://www.gnu.org/software/coreutils/uptime

### RetrievalQAWithSourcesChain

This variant returns an answer to the question alongside the source documents that were used to generate the answer.

In [20]:
from langchain.chains import RetrievalQAWithSourcesChain

retrieval_qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever
)

retrieval_qa_with_sources({"question": user_query}, return_only_outputs=True)

{'answer': 'The default shell is /bin/bash.\n', 'sources': '1-pl'}

###  [ConversationalRetrievalChain](https://python.langchain.com/docs/modules/chains/popular/chat_vector_db)

`ConversationalRetrievalChain` remembers and uses previous questions so you can have a chat-like discovery process.

To use this chain we must provide a memory class to store and pass the previous messages to the LLM as context. Here we use the `ConversationBufferMemory` class that comes with langchain.


In [21]:
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
conversational_retrieval = ConversationalRetrievalChain.from_llm(
    llm=llm, retriever=retriever, memory=memory
)

result = conversational_retrieval({"question": user_query})
print(result["answer"])

The default shell is /bin/sh and the command to show the uptime is uptime.


In [22]:
new_query = "What about other shells?"
result = conversational_retrieval({"question": new_query})
print(result["answer"])



In [23]:
new_query = "Is there a shell called Bash?"

result = conversational_retrieval({"question": new_query})
print(result["answer"])

Yes, Bash is a shell. It is a Bourne-again shell, which is a free software shell and the default shell for many Linux distributions.


### Advanced: Modifying the default langchain prompt
In all of the previous examples we used the default prompt that comes with langchain.

We can inspect our chain object to discover the wording of the prompt template being used.

We may find that this is not suitable for our purposes, and we may wish to customise the prompt, for example to present our results in a different format, or to specify additional constraints.

In [24]:
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
)

print(qa.combine_documents_chain.llm_chain.prompt.template)

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:


Let's modify the prompt to return an answer in a single word (useful for yes/no questions). We will constrain the LLM to say 'I don't know' if it cannot answer.

We create a new `prompt_template` and pass this in using the `template` argument.

In [25]:
from langchain.prompts import PromptTemplate

prompt_template = """Use the context to answer the question at the end.
You must always use the context and context only to answer the question. Never try to make up an answer. If the context is empty or you do not know the answer, just say "I don't know".
The answer should consist of only 1 word and not a sentence.

Context: {context}

Question: {question}
Helpful Answer:
"""
prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
qa_chain = RetrievalQA.from_llm(
    llm=llm, prompt=prompt, retriever=retriever, return_source_documents=True
)

In [26]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)

Use the context to answer the question at the end.
You must always use the context and context only to answer the question. Never try to make up an answer. If the context is empty or you do not know the answer, just say "I don't know".
The answer should consist of only 1 word and not a sentence.

Context: {context}

Question: {question}
Helpful Answer:



In [27]:
results = qa_chain({"query": user_query})

print("\nContex:")
for doc in results["source_documents"]:
    print("-" * 79)
    print(doc.page_content)

print("\nResult:")
print("=" * 79)
print(results["result"])
print("=" * 79)


Contex:
-------------------------------------------------------------------------------
uptime(1) — Linux manual pages

Linux manual pages	 Section 1	  

Name

uptime — tell how long the system has been running

Synopsis

uptime [OPTION...] [FILE]

DESCRIPTION

Print the current time, the length of time the system has been up, the number of users on the system, and the average number of jobs in the run queue over the last 1, 5 and 15 minutes. Processes in an uninterruptible sleep state also contribute to the load average. If FILE is not specified, use /var/run/utmp. /var/log/wtmp as FILE is common.

−−help

display this help and exit

−−version

output version information and exit

AUTHOR

Written by Joseph Arceneaux, David MacKenzie, and Kaveh Ghazi.

REPORTING BUGS

GNU coreutils online help: <http://www.gnu.org/software/coreutils/>

Report uptime translation bugs to <http://translationproject.org/team/>

SEE ALSO

Full documentation at: <http://www.gnu.org/software/coreutils/uptime