In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.chains.llm import LLMChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.memory import ConversationBufferMemory
from dotenv import load_dotenv
import os
load_dotenv()

True

In [2]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [3]:
loader = PyPDFDirectoryLoader("docs/")
docs = loader.load()

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(docs)

In [6]:
persist_directory = 'db'
## here we are using OpenAI embeddings but in future we will swap out to local embeddings
embedding = OpenAIEmbeddings()

In [None]:
vectordb = Chroma.from_documents(documents=texts, 
                                 embedding=embedding,
                                 persist_directory=persist_directory)


In [None]:
# persiste the db to disk
vectordb.persist()
vectordb = None

In [7]:
# Now we can load the persisted database from disk, and use it as normal. 
vectordb = Chroma(persist_directory=persist_directory, 
                  embedding_function=embedding)

Test vector db with a query

In [8]:
vectordb.similarity_search("waht is metrology?")

[Document(page_content='metres above ground level.\nThis exemption does not apply to costeaning and bulk sampling activities.\nNative vegetation that is to be removed, destroyed or lopped to the minimum extent\nnecessary by, or on behalf of, a licenced surveyor (within the meaning of section 3 of\ntheSurveying Act 2004 ) using hand-held tools to establish a sightline for the\nmeasurement of land.Surveying\nNative vegetation that is to be removed, destroyed or lopped by a person acting under,\nand in accordance with:Traditional\nowners\nPage690of1122NILLUMBIK PLANNING SCHEME', metadata={'source': 'docs\\Nillumbik PS All Ordinance 2.pdf', 'page': 689}),
 Document(page_content='Ifnodevelopmentofthelothasbeenapprovedunderthisscheme,containabuildingenvelope\nandbeabletocontainarectanglemeasuring10metresby15metres,or9metresby15metres\nifaboundarywallisnominatedaspartofthebuildingenvelope.\nIflotsofbetween300squaremetresand500squaremetresareproposedtocontaindwellingsthat\narebuilttotheboundar

Construct conversational retrieval chain

Requirements:
- chat model
- chat memory
- return sources



In [9]:
CONDENSE_PROMPT = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""

QA_PROMPT = """You will act as an AI Assistant that I am having a conversation with. You have the expertise of an expert physics professor that specialises in advanced mathematical physics and dynamical mechanics. You will provide answers and guidance from your extensive knowledge, and will always provide relevant theorems when requested. Additionally, you will ask follow-up questions to clarify the last response and provide more accurate and personalized answers. If the answer is not included in your knowledge base, you will say 'Hmm, I am not sure.' and stop after that. Your goal is to provide the best possible guidance and support to help me with my queries and problems. You think things through step by step every time and show your working. You break down problems into simple steps before solving and always double check your answers. 

{summaries}

Question: {question}
Helpful answer in markdown or latex where equations are included:"""

# Create PromptTemplate instances
CONDENSE_QUESTION_PROMPT = PromptTemplate(
    input_variables=["chat_history", "question"], 
    template=CONDENSE_PROMPT
)

QA_PROMPT_TEMPLATE = PromptTemplate(
    input_variables=["summaries", "question"], 
    template=QA_PROMPT
)


gpt3 = ChatOpenAI(temperature=0, model_name = 'gpt-3.5-turbo')
gpt4 = ChatOpenAI(temperature=0, model_name = 'gpt-4')
question_generator = LLMChain(llm=gpt3, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_with_sources_chain(gpt4, chain_type="stuff", prompt=QA_PROMPT_TEMPLATE)

chain = ConversationalRetrievalChain(
    retriever=vectordb.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
    return_source_documents = True
)

chat_history = []
query = "What are important equationas in dyanmical metrology??"
result = chain({"question": query, "chat_history": chat_history})
result['answer']

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=60).


In [15]:
result

{'question': 'What are important equationas in dyanmical metrology??',
 'chat_history': [],
 'answer': 'In dynamical meteorology, some of the important equations are:\n\n1. **Mass Continuity Equation**: This equation describes the conservation of mass in a fluid system. It is given by:\n\n   $$\\frac{\\partial \\rho}{\\partial t} + \\nabla \\cdot (\\rho \\mathbf{u}) = 0$$\n\n   where $\\rho$ is the density of the fluid, $t$ is time, and $\\mathbf{u}$ is the velocity vector.\n\n2. **Momentum Equation**: This equation describes the conservation of momentum in a fluid system. In a rotating frame of reference, it is given by:\n\n   $$\\rho \\frac{D\\mathbf{u}}{Dt} = -\\nabla p + \\rho \\mathbf{g} - 2\\rho \\mathbf{u} \\times \\mathbf{\\Omega} + \\rho \\mathbf{F}_{friction}$$\n\n   where $D/Dt$ is the material derivative, $p$ is pressure, $\\mathbf{g}$ is the gravitational acceleration vector, $\\mathbf{\\Omega}$ is the angular velocity vector of the Earth, and $\\mathbf{F}_{friction}$ is t

1

In [None]:

CONDENSE_PROMPT = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""

QA_PROMPT = """You will act as an AI Assistant that I am having a conversation with. You have the expertise of an expert physics professor that specialises in advanced mathematical physics and dynamical mechanics. You will provide answers and guidance from your extensive knowledge, and will always provide relevant theorems when requested. Additionally, you will ask follow-up questions to clarify the last response and provide more accurate and personalized answers. If the answer is not included in your knowledge base, you will say 'Hmm, I am not sure.' and stop after that. Your goal is to provide the best possible guidance and support to help me with my queries and problems. You think things through step by step every time and show your working. You break down problems into simple steps before solving and always double check your answers. 

{context}

Question: {question}
Helpful answer in markdown or latex where equations are included:"""



model = ChatOpenAI(temperature=0.9, model_name = 'gpt-4')
chain = ConversationalRetrievalChain.from_llm(model,vectordb.as_retriever(), memory = memory, return_source_documents = True) 
    # {
    #     'qaTemplate': QA_PROMPT,
    #     'questionGeneratorTemplate': CONDENSE_PROMPT,
    #     'returnSourceDocuments': True,  # The number of source documents returned is 4 by default
    # },

query = "What is coriolis force?"
result = chain({"question": query})
result['answer']

2

In [None]:
CONDENSE_PROMPT = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""

In [None]:
from langchain.chains.llm import LLMChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain import PromptTemplate



# Create PromptTemplate instances
CONDENSE_QUESTION_PROMPT = PromptTemplate(
    input_variables=["chat_history", "question"], 
    template=CONDENSE_PROMPT
)
# Construct a ConversationalRetrievalChain with a streaming llm for combine docs
# and a separate, non-streaming llm for question generation
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
gpt3 = ChatOpenAI(temperature=0, model_name = 'gpt-3.5-turbo')
gpt4 = ChatOpenAI(temperature=0, model_name = 'gpt-4')
question_generator = LLMChain(llm=gpt3, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_with_sources_chain(gpt4, chain_type="stuff")

chain = ConversationalRetrievalChain(
    retriever=vectordb.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
)
chat_history = []
query = "What is coriolis force?"
result = chain({"question": query, "chat_history": chat_history})
result['answer']

3

In [None]:
# from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
# QA_PROMPT

In [None]:
CONDENSE_PROMPT = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""

QA_PROMPT = """You will act as an AI Assistant that I am having a conversation with. You have the expertise of an expert physics professor that specialises in advanced mathematical physics and dynamical mechanics. You will provide answers and guidance from your extensive knowledge, and will always provide relevant theorems when requested. Additionally, you will ask follow-up questions to clarify the last response and provide more accurate and personalized answers. If the answer is not included in your knowledge base, you will say 'Hmm, I am not sure.' and stop after that. Your goal is to provide the best possible guidance and support to help me with my queries and problems. You think things through step by step every time and show your working. You break down problems into simple steps before solving and always double check your answers. 

{summaries}

Question: {question}
Helpful answer in markdown or latex where equations are included:"""

In [None]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain

# Create PromptTemplate instances
CONDENSE_QUESTION_PROMPT = PromptTemplate(
    input_variables=["chat_history", "question"], 
    template=CONDENSE_PROMPT
)

QA_PROMPT_TEMPLATE = PromptTemplate(
    input_variables=["summaries", "question"], 
    template=QA_PROMPT
)


gpt3 = ChatOpenAI(temperature=0, model_name = 'gpt-3.5-turbo')
gpt4 = ChatOpenAI(temperature=0, model_name = 'gpt-4')
question_generator = LLMChain(llm=gpt3, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_with_sources_chain(gpt4, chain_type="stuff", prompt=QA_PROMPT_TEMPLATE)

chain = ConversationalRetrievalChain(
    retriever=vectordb.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
    return_source_documents = True
)

chat_history = []
query = "What are important equationas in dyanmical metrology??"
result = chain({"question": query, "chat_history": chat_history})
result['answer']

In [None]:
# 1. First, create a custom chat prompt template using the SystemMessagePromptTemplate and HumanMessagePromptTemplate classes:

from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

system_prompt_template = "Your custom system prompt template with {variables}"
system_message_prompt = SystemMessagePromptTemplate.from_template(system_prompt_template)

human_prompt_template = "Your custom human prompt template with {variables}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_prompt_template)

chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])


# 2. Next, create a ChatOpenAI instance and use it with your custom chat prompt template:

from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain

chat_model = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
question_generator = LLMChain(llm=chat_model, prompt=chat_prompt)


# 3. Finally, use the question_generator with your custom chat prompt template in the ConversationalRetrievalChain:

from langchain.chains.question_answering import load_qa_chain

doc_chain = load_qa_chain(chat_model, chain_type="map_reduce")

chain = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
)

In [1]:
from pdf_chat_bot import chat_bot_response
chat_history = []
query = "What are important equationas in dyanmical metrology??"
chat_bot_response(query, chat_history)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=60).
