# Project: Q & A on private documents (RAG)

In [23]:
%%capture
!pip install -r requirements.txt

In [2]:
import os
import openai
import getpass

In [3]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OPENAI_API_KEY: ")

Enter your OPENAI_API_KEY:  ········


In [4]:
os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter your PINECONE_API_KEY: ")

Enter your PINECONE_API_KEY:  ········


## Loading Documents

In [5]:
# loading PDF, DOCX and TXT files as LangChain Documents
def load_documents(file):
    import os
    name, extension = os.path.splitext(file)

    if extension == '.pdf':
        from langchain.document_loaders import PyPDFLoader
        print(f'Loading {file}')
        loader = PyPDFLoader(file)
    elif extension == '.docx':
        from langchain.document_loaders import Docx2txtLoader
        print(f'Loading {file}')
        loader = Docx2txtLoader(file)
    elif extension == '.txt':
        from langchain.document_loaders import TextLoader
        loader = TextLoader(file)
    else:
        print('Document format is not supported!')
        return None

    data = loader.load()
    return data


# wikipedia
def load_from_wikipedia(query, lang='en', load_max_docs=2):
    from langchain.document_loaders import WikipediaLoader
    loader = WikipediaLoader(query=query, lang=lang, load_max_docs=load_max_docs)
    data = loader.load()
    return data

In [6]:
data = load_documents("files/us_constitution.pdf")

Loading files/us_constitution.pdf


In [16]:
data[1]

Document(metadata={'source': 'files/us_constitution.pdf', 'page': 1}, page_content='The House of Representatives shall be composed of Members chosen \n every second Y ear by the People of the several States, and the \n Electors in each State shall have the Qualifications requisite for \n Electors of the most numerous Branch of the State Legislature. \n No Person shall be a Representative who shall not have attained to the \n Age of twenty five Y ears, and been seven Y ears a Citizen of the United \n States, and who shall not, when elected, be an Inhabitant of that State \n in which he shall be chosen. \n Representatives and direct T axes shall be apportioned among the \n several States which may be included within this Union, according to \n their respective Numbers, which shall be determined by adding to the \n whole Number of free Persons, including those bound to Service for a \n T erm of Y ears, and excluding Indians not taxed, three fifths of all other \n Persons. The actual Enume

In [10]:
data[1].page_content

'The House of Representatives shall be composed of Members chosen\nevery second Year by the People of the several States, and the\nElectors in each State shall have the Qualifications requisite for\nElectors of the most numerous Branch of the State Legislature.\nNo Person shall be a Representative who shall not have attained to the\nAge of twenty five Years, and been seven Years a Citizen of the United\nStates, and who shall not, when elected, be an Inhabitant of that State\nin which he shall be chosen.\nRepresentatives and direct Taxes shall be apportioned among the\nseveral States which may be included within this Union, according to\ntheir respective Numbers, which shall be determined by adding to the\nwhole Number of free Persons, including those bound to Service for a\nTerm of Years, and excluding Indians not taxed, three fifths of all other\nPersons. The actual Enumeration shall be made within three Years\nafter the first Meeting of the Congress of the United States, and within\n

In [43]:
len(data[1].page_content)

1501

In [48]:
len(data[1].page_content.split())

249

In [11]:
data[1].metadata

{'source': 'files/us_constitution.pdf',
 'file_path': 'files/us_constitution.pdf',
 'page': 1,
 'total_pages': 41,
 'format': 'PDF 1.4',
 'title': 'us_constitution',
 'author': '',
 'subject': '',
 'keywords': '',
 'creator': '',
 'producer': 'Skia/PDF m115 Google Docs Renderer',
 'creationDate': '',
 'modDate': '',
 'trapped': ''}

In [17]:
len(data)

41

In [18]:
docs_data = load_documents("files/the_great_gatsby.docx")

Loading files/the_great_gatsby.docx


In [19]:
len(docs_data)

1

In [21]:
# docs_data[0].page_content

In [22]:
text_data  = load_documents("files/state_of_the_union.txt")

In [23]:
len(text_data)

1

In [25]:
# text_data[0].page_content

In [26]:
wiki_data = load_from_wikipedia("LLM Agents and GPT-4", load_max_docs=10)

In [27]:
len(wiki_data)

10

In [28]:
wiki_data[0].page_content

'Generative Pre-trained Transformer 4 (GPT-4) is a multimodal large language model created by OpenAI, and the fourth in its series of GPT foundation models. It was launched on March 14, 2023, and made publicly available via the paid chatbot product ChatGPT Plus, via OpenAI\'s API, and via the free chatbot Microsoft Copilot.  As a transformer-based model, GPT-4 uses a paradigm where pre-training using both public data and "data licensed from third-party providers" is used to predict the next token. After this step, the model was then fine-tuned with reinforcement learning feedback from humans and AI for human alignment and policy compliance.:\u200a2\u200a\nObservers reported that the iteration of ChatGPT using GPT-4 was an improvement on the previous iteration based on GPT-3.5, with the caveat that GPT-4 retains some of the problems with earlier revisions. GPT-4, equipped with vision capabilities (GPT-4V), is capable of taking images as input on ChatGPT. OpenAI has declined to reveal va

In [29]:
wiki_data[-1].page_content

'Generative artificial intelligence (generative AI, GenAI, or GAI) is a subset of artificial intelligence that uses generative models to produce text, images, videos, or other forms of data. These models often generate output in response to specific prompts. Generative AI systems learn the underlying patterns and structures of their training data, enabling them to create new data. \nImprovements in transformer-based deep neural networks, particularly large language models (LLMs), enabled an AI boom of generative AI systems in the early 2020s. These include chatbots such as ChatGPT, Copilot, Gemini and LLaMA, text-to-image artificial intelligence image generation systems such as Stable Diffusion, Midjourney and DALL-E, and text-to-video AI generators such as Sora. Companies such as OpenAI, Anthropic, Microsoft, Google, and Baidu as well as numerous smaller firms have developed generative AI models.\nGenerative AI has uses across a wide range of industries, including software development

## Chunking Data

In [7]:
def chunk_data(data, chunk_size=256):
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
    chunks = text_splitter.split_documents(data)
    return chunks

In [8]:
chunks = chunk_data(data, chunk_size=512)
len(chunks)

112

In [34]:
chunks[0]

Document(metadata={'source': 'files/us_constitution.pdf', 'page': 0}, page_content='The United States Constitution \n W e the People of the United States, in Order to form a more perfect \n Union, establish Justice, insure domestic T ranquility , provide for the \n common defence, promote the general W elfare, and secure the \n Blessings of Liberty to ourselves and our Posterity , do ordain and \n establish this Constitution for the United States of America. \nThe Constitutional Con v ention \n Article I \n Section 1: Congress')

In [35]:
chunks[1]

Document(metadata={'source': 'files/us_constitution.pdf', 'page': 0}, page_content='All legislative Powers herein granted shall be vested in a Congress of \n the United States, which shall consist of a Senate and House of \n Representatives. \nSection 2: The House of Representatives')

In [36]:
chunks[-1]

Document(metadata={'source': 'files/us_constitution.pdf', 'page': 40}, page_content='26th Amendment \n Section 1 \n The right of citizens of the United States, who are eighteen years of \n age or older , to vote shall not be denied or abridged by the United \n States or by any State on account of age. \nSection 2 \n The Congress shall have power to enforce this article by appropriate \n legislation. \n27th Amendment \n No law , varying the compensation for the services of the Senators and \n Representatives, shall take ef fect, until an election of representatives \n shall have intervened.')

## Calculating the Cost

In [37]:
def print_embedding_cost(texts):
    import tiktoken
    enc = tiktoken.encoding_for_model('text-embedding-3-small')
    total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
    print(f'Total Tokens: {total_tokens}')
    print(f'Embedding Cost in USD: {total_tokens / 1000 * 0.00002:.6f}')

In [38]:
data_embedding_cost = print_embedding_cost(data)
data_embedding_cost

Total Tokens: 10032
Embedding Cost in USD: 0.000201


In [39]:
data_embedding_cost = print_embedding_cost(chunks)
data_embedding_cost

Total Tokens: 9927
Embedding Cost in USD: 0.000199


## Embedding and Uploading to a Vector Database (Pinecone)

In [55]:
def insert_or_fetch_embeddings(index_name, chunks):
    # importing the necessary libraries and initializing the Pinecone client
    import pinecone
    from langchain_community.vectorstores import Pinecone
    from langchain_openai import OpenAIEmbeddings
    from pinecone import PodSpec

    
    pc = pinecone.Pinecone()
        
    embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)  # 512 works as well

    # loading from existing index
    if index_name in pc.list_indexes().names():
        print(f'Index {index_name} already exists. Loading embeddings ... ', end='')
        vector_store = Pinecone.from_existing_index(index_name, embeddings)
        print('Ok')
    else:
        # creating the index and embedding the chunks into the index 
        print(f'Creating index {index_name} and embeddings ...', end='')

        # creating a new index
        pc.create_index(
            name=index_name,
            dimension=1536,
            metric='cosine',
            spec=PodSpec(
                environment='gcp-starter'
            )
        )

        # processing the input documents, generating embeddings using the provided `OpenAIEmbeddings` instance,
        # inserting the embeddings into the index and returning a new Pinecone vector store object. 
        vector_store = Pinecone.from_documents(chunks, embeddings, index_name=index_name)
        print('Ok')
        
    return vector_store
    

In [56]:
def delete_pinecone_index(index_name='all'):
    import pinecone
    pc = pinecone.Pinecone()
    
    if index_name == 'all':
        indexes = pc.list_indexes().names()
        print('Deleting all indexes ... ')
        for index in indexes:
            pc.delete_index(index)
        print('Ok')
    else:
        print(f'Deleting index {index_name} ...', end='')
        pc.delete_index(index_name)
        print('Ok')

In [51]:
delete_pinecone_index()

Deleting all indexes ... 
Ok


In [57]:
index_name = 'askadocument'
vector_store = insert_or_fetch_embeddings(index_name=index_name, chunks=chunks)

Creating index askadocument and embeddings ...Ok


## Asking and Getting Answers

In [15]:
def ask_and_get_answer(vector_store, q, k=3):
    from langchain.chains import RetrievalQA
    from langchain_openai import ChatOpenAI

    llm = ChatOpenAI(model='gpt-4o-mini', temperature=1)

    retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': k})

    chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
    
    answer = chain.invoke(q)
    return answer

In [59]:
q = 'What is the Bill of Rights?'
answer = ask_and_get_answer(vector_store, q)
print(answer)

{'query': 'What is the Bill of Rights?', 'result': 'The Bill of Rights refers to the first ten amendments to the United States Constitution. These amendments were ratified in 1791 and serve to protect individual liberties and rights against government interference. The Bill of Rights includes important provisions such as freedom of speech, religion, and assembly (First Amendment), the right to bear arms (Second Amendment), protection against unreasonable searches and seizures (Fourth Amendment), and rights related to criminal prosecutions (Fifth through Eighth Amendments), among others.'}


### While loop for Asking questions

In [60]:
import time
i = 1
print('Write Quit or Exit to quit.')
while True:
    q = input(f'Question #{i}: ')
    i = i + 1
    if q.lower() in ['quit', 'exit']:
        print('Quitting ... bye bye!')
        time.sleep(2)
        break
    
    answer = ask_and_get_answer(vector_store, q)
    print(f'\nAnswer: {answer["result"]}')
    print(f'\n {"-" * 100} \n')

Write Quit or Exit to quit.


Question #1:  What is the first amendment described in the document?



Answer: The First Amendment states: "Congress shall make no law respecting an establishment of religion, or prohibiting the free exercise thereof; or abridging the freedom of speech, or of the press; or the right of the people peaceably to assemble, and to petition the Government for a redress of grievances."

 ---------------------------------------------------------------------------------------------------- 



Question #2:  what about the second amendment?



Answer: The Second Amendment states: "A well regulated Militia, being necessary to the security of a free State, the right of the people to keep and bear Arms, shall not be infringed." This amendment is often cited in discussions about gun rights and gun control in the United States.

 ---------------------------------------------------------------------------------------------------- 



Question #3:  Explain the election laws in the states as described in the document.



Answer: The document mentions several key aspects of election laws in the states, primarily focused on voting rights and representation:

1. **Due Process and Equal Protection**: States cannot deprive individuals of life, liberty, or property without due process of law. Additionally, they cannot deny any person within their jurisdiction equal protection under the laws. This establishes a legal foundation for ensuring fair treatment in the electoral process.

2. **Apportionment of Representatives**: Representatives in Congress are to be apportioned among the states based on their respective populations, counting the total number of persons in each state while excluding non-taxed Indians. This ensures that representation in Congress is reflective of the population distribution across states.

3. **Election of Senators**: The 17th Amendment specifies that Senators are to be elected directly by the people of each state, rather than appointed by state legislatures. Each state is represente

Question #4:  Bye



Answer: Goodbye! If you have any more questions in the future, feel free to ask.

 ---------------------------------------------------------------------------------------------------- 



Question #5:  Exit


Quitting ... bye bye!


### Ask Wikipidea

In [61]:
delete_pinecone_index()

Deleting all indexes ... 
Ok


In [62]:
data = load_from_wikipedia('Google Gemini', load_max_docs=100)
chunks = chunk_data(data)



  lis = BeautifulSoup(html).find_all('li')


In [63]:
len(chunks)

1991

In [64]:
chunks[:3]

[Document(metadata={'title': 'Gemini (chatbot)', 'summary': "Gemini, formerly known as Bard, is a generative artificial intelligence chatbot developed by Google. Based on the large language model (LLM) of the same name, it was launched in 2023 after being developed as a direct response to the rise of OpenAI's ChatGPT. It was previously based on PaLM, and initially the LaMDA family of large language models.\nLaMDA had been developed and announced in 2021, but it was not released to the public out of an abundance of caution. OpenAI's launch of ChatGPT in November 2022 and its subsequent popularity caught Google executives off-guard, prompting a sweeping response in the ensuing months. After mobilizing its workforce, the company launched Bard in a limited capacity in March 2023 before expanding to other countries in May. Bard took center stage during the 2023 Google I/O keynote in May and was upgraded to the Gemini LLM in December. In February 2024, Bard and Duet AI, another artificial in

In [65]:
index_name = 'gemini'
vector_store = insert_or_fetch_embeddings(index_name=index_name, chunks=chunks)

Creating index gemini and embeddings ...Ok


In [68]:
q = 'What is Google Gemini?'
answer = ask_and_get_answer(vector_store, q)
print(answer["result"])

Google Gemini is a large language model (LLM) developed by Google DeepMind, announced during the Google I/O keynote on May 10, 2023. It is touted as Google's "largest and most capable AI model" and is designed to emulate human behavior. Google intended to incorporate Gemini into various products, including Search, Ads, Chrome, Duet AI on Google Workspace, and AlphaCode 2. It serves as a more powerful successor to PaLM 2 and was made available only in English.


In [69]:
q = 'When was Google Gemini launched?'
answer = ask_and_get_answer(vector_store, q)
print(answer["result"])

Google Gemini was announced on May 10, 2023, and the launch date was targeted for late 2023.


In [70]:
q = 'What models (llms) are available with Google Gemini?'
answer = ask_and_get_answer(vector_store, q)
print(answer["result"])

The Google Gemini family includes several models: Gemini Ultra, Gemini Pro, Gemini Flash, and Gemini Nano.


In [71]:
delete_pinecone_index()

Deleting all indexes ... 
Ok


## Using Chroma as a Vector DB

- [Langchain - Chroma integration](https://docs.trychroma.com/integrations/langchain)

In [24]:
def create_embeddings_chroma(chunks, persist_directory="./chroma_db"):
    from langchain_chroma import Chroma
    from langchain_openai import OpenAIEmbeddings

    embeddings = OpenAIEmbeddings(model="text-embedding-3-small", dimensions=1536)
    vectore_store = Chroma.from_documents(chunks, embeddings, persist_directory=persist_directory)
    return vectore_store

def load_embeddings_chroma(persist_directory="./chroma_db"):
    from langchain_chroma import Chroma
    from langchain_openai import OpenAIEmbeddings

    embeddings = OpenAIEmbeddings(model="text-embedding-3-small", dimensions=1536)
    vectore_store = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
    return vectore_store

In [34]:
data = load_documents('files/rag_powered_by_google_search.pdf')

# Splitting the document into chunks
chunks = chunk_data(data, chunk_size=256)

# Creating a Chroma vector store using the provided text chunks and embedding model (default is text-embedding-3-small)
vector_store = create_embeddings_chroma(chunks)

Loading files/rag_powered_by_google_search.pdf


In [35]:
# Asking questions
q = 'What is Vertex AI Search?'
answer = ask_and_get_answer(vector_store, q)
answer

{'query': 'What is Vertex AI Search?',
 'result': "Vertex AI Search is a search feature within Google's Vertex AI platform that offers advanced capabilities including customizable answers, search tuning, vector search, grounding, and compliance updates tailored for enterprises. It integrates generative AI technologies to enhance the search experience and provide more relevant results."}

In [36]:
answer["result"]

"Vertex AI Search is a search feature within Google's Vertex AI platform that offers advanced capabilities including customizable answers, search tuning, vector search, grounding, and compliance updates tailored for enterprises. It integrates generative AI technologies to enhance the search experience and provide more relevant results."

In [42]:
# Load a Chroma vector store from the specified directory (default ./chroma_db) 
vectore_store = load_embeddings_chroma()
q = 'How many pairs of questions and answers had the StackOverflow dataset?'
answer = ask_and_get_answer(vector_store, q, k=5)
answer["result"]

'The StackOverflow dataset had 8 million pairs of questions and answers.'

In [43]:
# We can't ask follow-up questions. There is no memory (chat history) available.
q = 'Multiply that number by 2.'
answer = ask_and_get_answer(vector_store, q)
print(answer['result'])

I don't know.


## Adding Memory (Chat History)

In [40]:
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

In [49]:
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
retriever = vectore_store.as_retriever(search_type="similarity", search_kwargs={"k": 15})
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

retrieval_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    chain_type="stuff",
    verbose=True
)

In [50]:
# create a function to ask questions
def ask_question(q, chain):
    result = chain.invoke({'question': q})
    return result

In [51]:
data = load_documents('files/rag_powered_by_google_search.pdf')
chunks = chunk_data(data, chunk_size=256)
vector_store = create_embeddings_chroma(chunks)

Loading files/rag_powered_by_google_search.pdf


In [52]:
q = 'How many pairs of questions and answers had the StackOverflow dataset?'
result = ask_question(q, retrieval_chain)
result



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
simple similarity search was highly e ective because the dataset had 8
million pairs of questions and answers. However, datasets do not
usually contain pre-existing question-and-answer or query-and-

simple similarity search was highly e ective because the dataset had 8
million pairs of questions and answers. However, datasets do not
usually contain pre-existing question-and-answer or query-and-

simple similarity search was highly e ective because the dataset had 8
million pairs of questions and answers. However, datasets do not
usually contain pre-existing question-and-answer or query-and-

million pairs of questions and answers. However, datasets do not
usually contain

{'question': 'How many pairs of questions and answers had the StackOverflow dataset?',
 'chat_history': [HumanMessage(content='How many pairs of questions and answers had the StackOverflow dataset?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='The StackOverflow dataset had 8 million pairs of questions and answers.', additional_kwargs={}, response_metadata={})],
 'answer': 'The StackOverflow dataset had 8 million pairs of questions and answers.'}

In [53]:
result["answer"]

'The StackOverflow dataset had 8 million pairs of questions and answers.'

In [54]:
q = 'Multiply that number by 10.'
result = ask_question(q, retrieval_chain)
result["answer"]



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: How many pairs of questions and answers had the StackOverflow dataset?
Assistant: The StackOverflow dataset had 8 million pairs of questions and answers.
Follow Up Input: Multiply that number by 10.
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
This substantial investment has helped to reduce costs and latency,
enabling us to bring production-grade semantic search experience to
billions of users.
Google has spent years i

'8 million multiplied by 10 is 80 million.'

In [55]:
q = 'Devide the result by 80.'
result = ask_question(q, retrieval_chain)
result["answer"]



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: How many pairs of questions and answers had the StackOverflow dataset?
Assistant: The StackOverflow dataset had 8 million pairs of questions and answers.
Human: Multiply that number by 10.
Assistant: 8 million multiplied by 10 is 80 million.
Follow Up Input: Devide the result by 80.
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
e ciency of an advanced RAG system.
Contact sales Get started for freeCloud Blog

di erent req

'80 million divided by 80 is 1 million.'

In [56]:
for item in result['chat_history']:
    print(item)

content='How many pairs of questions and answers had the StackOverflow dataset?' additional_kwargs={} response_metadata={}
content='The StackOverflow dataset had 8 million pairs of questions and answers.' additional_kwargs={} response_metadata={}
content='Multiply that number by 10.' additional_kwargs={} response_metadata={}
content='8 million multiplied by 10 is 80 million.' additional_kwargs={} response_metadata={}
content='Devide the result by 80.' additional_kwargs={} response_metadata={}
content='80 million divided by 80 is 1 million.' additional_kwargs={} response_metadata={}


### Loop for asking questions

In [58]:
while True:
    q = input('Your question: ')
    if q.lower() in 'exit quit bye':
        print('Bye bye!')
        break
    result = ask_question(q, retrieval_chain)
    print(result['answer'])
    print('-' * 100)

Your question:  Tell me about Google Search technologies as described in the document.




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: How many pairs of questions and answers had the StackOverflow dataset?
Assistant: The StackOverflow dataset had 8 million pairs of questions and answers.
Human: Multiply that number by 10.
Assistant: 8 million multiplied by 10 is 80 million.
Human: Devide the result by 80.
Assistant: 80 million divided by 80 is 1 million.
Human: Tell me about Google Search technologies as described in the document.
Assistant: The Google Search technologies described in the document include RankBrain, neural matching, ScaNN, and a family of TPUs (Tensor Processing Units).
Follow Up Input: Tell me about Google Search technologies as described in the document.
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...

Your question:  Is Vertex AI Search a fully-managed platform?




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: How many pairs of questions and answers had the StackOverflow dataset?
Assistant: The StackOverflow dataset had 8 million pairs of questions and answers.
Human: Multiply that number by 10.
Assistant: 8 million multiplied by 10 is 80 million.
Human: Devide the result by 80.
Assistant: 80 million divided by 80 is 1 million.
Human: Tell me about Google Search technologies as described in the document.
Assistant: The Google Search technologies described in the document include RankBrain, neural matching, ScaNN, and a family of TPUs (Tensor Processing Units).
Human: Tell me about Google Search technologies as described in the document.
Assistant: The Google Search technologies described in the document include RankBrain, neural matching, ScaNN, 

Your question:  Is Vertex AI Search a fully-managed platform?




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: How many pairs of questions and answers had the StackOverflow dataset?
Assistant: The StackOverflow dataset had 8 million pairs of questions and answers.
Human: Multiply that number by 10.
Assistant: 8 million multiplied by 10 is 80 million.
Human: Devide the result by 80.
Assistant: 80 million divided by 80 is 1 million.
Human: Tell me about Google Search technologies as described in the document.
Assistant: The Google Search technologies described in the document include RankBrain, neural matching, ScaNN, and a family of TPUs (Tensor Processing Units).
Human: Tell me about Google Search technologies as described in the document.
Assistant: The Google Search technologies described in the document include RankBrain, neural matching, ScaNN, 

Your question:  What advancements have been made in semantic search?




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: How many pairs of questions and answers had the StackOverflow dataset?
Assistant: The StackOverflow dataset had 8 million pairs of questions and answers.
Human: Multiply that number by 10.
Assistant: 8 million multiplied by 10 is 80 million.
Human: Devide the result by 80.
Assistant: 80 million divided by 80 is 1 million.
Human: Tell me about Google Search technologies as described in the document.
Assistant: The Google Search technologies described in the document include RankBrain, neural matching, ScaNN, and a family of TPUs (Tensor Processing Units).
Human: Tell me about Google Search technologies as described in the document.
Assistant: The Google Search technologies described in the document include RankBrain, neural matching, ScaNN, 

Your question:  bye


Bye bye!


## Using a Custom Prompt

In [59]:
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

llm = ChatOpenAI(model_name='gpt-4o', temperature=0)
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 5})
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)


system_template = r'''
Use the following pieces of context to answer the user's question.
Before answering translate your response to Jamaican Patois.
If you don't find the answer in the provided context, just respond "I don't know."
---------------
Context: ```{context}```
'''

user_template = '''
Question: ```{question}```
'''

messages= [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template(user_template)
]

qa_prompt = ChatPromptTemplate.from_messages(messages)

crc = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    chain_type='stuff',
    combine_docs_chain_kwargs={'prompt': qa_prompt },
    verbose=True
)

In [60]:
# db = load_embeddings_chroma()
q = 'How many pairs of questions and answers had the StackOverflow dataset?'
result = ask_question(q, crc)
result



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: 
Use the following pieces of context to answer the user's question.
Before answering translate your response to Jamaican Patois.
If you don't find the answer in the provided context, just respond "I don't know."
---------------
Context: ```simple similarity search was highly e ective because the dataset had 8
million pairs of questions and answers. However, datasets do not
usually contain pre-existing question-and-answer or query-and-

simple similarity search was highly e ective because the dataset had 8
million pairs of questions and answers. However, datasets do not
usually contain pre-existing question-and-answer or query-and-

simple similarity search was highly e ective because the dataset had 8
million pairs of questions and answers. However, datasets do not
usually contain pre-existing question-and-answer or query-and-

million pairs of 

{'question': 'How many pairs of questions and answers had the StackOverflow dataset?',
 'chat_history': [HumanMessage(content='How many pairs of questions and answers had the StackOverflow dataset?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Di StackOverflow dataset did have 8 million pairs of questions and answers.', additional_kwargs={}, response_metadata={})],
 'answer': 'Di StackOverflow dataset did have 8 million pairs of questions and answers.'}

In [62]:
result["answer"]

'Di StackOverflow dataset did have 8 million pairs of questions and answers.'

In [63]:
q = 'When was Elon Musk born?'
result = ask_question(q, crc)
result["answer"]



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: How many pairs of questions and answers had the StackOverflow dataset?
Assistant: Di StackOverflow dataset did have 8 million pairs of questions and answers.
Follow Up Input: When was Elon Musk born?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: 
Use the following pieces of context to answer the user's question.
Before answering translate your response to Jamaican Patois.
If you don't find the answer in the provided context, just respond "I don't know."
---------------
Context: ```house AI processors back in 2013 — the Tensor Processing Unit (TPU).
TPUs are speci cally tailored to provide the u

'Mi nuh know.'

In [64]:
q = 'When was Bill Gates born?'
result = ask_question(q, crc)
result["answer"]



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: How many pairs of questions and answers had the StackOverflow dataset?
Assistant: Di StackOverflow dataset did have 8 million pairs of questions and answers.
Human: When was Elon Musk born?
Assistant: Mi nuh know.
Follow Up Input: When was Bill Gates born?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: 
Use the following pieces of context to answer the user's question.
Before answering translate your response to Jamaican Patois.
If you don't find the answer in the provided context, just respond "I don't know."
---------------
Context: ```This substantial investment has helped to reduce costs and

'Mi nuh know.'