In [11]:
import os
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_cohere import ChatCohere, CohereEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from dotenv import load_dotenv

In [12]:

load_dotenv('.env')
# Initializes your app with your bot token and socket mode handler
cohere_api_key=os.environ.get("cohere_api_key")

llm = ChatCohere(cohere_api_key=cohere_api_key, temperature=0)
loader = PyPDFLoader("Light.pdf", extract_images=True)
pages = loader.load_and_split()

# ### Construct retriever ###
# loader = WebBaseLoader(
#     web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
#     bs_kwargs=dict(
#         parse_only=bs4.SoupStrainer(
#             class_=("post-content", "post-title", "post-header")
#         )
#     ),
# )
# docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(pages)
vectorstore = Chroma.from_documents(documents=splits, embedding=CohereEmbeddings())
retriever = vectorstore.as_retriever()

In [13]:
### Contextualize question ###
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [14]:

### Answer question ###
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [15]:
### Statefully manage chat history ###
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [16]:
conversational_rag_chain.invoke(
    {"input": "What is Electric charge?"},
    config={"configurable": {"session_id": "abc123"}},  # constructs a key "abc123" in `store`.
)["answer"]

'Electric charge refers to the property of certain particles, such as protons and electrons, that can exert a force on each other at a distance.'

In [17]:
conversational_rag_chain.invoke(
    {"input": "What are the basic properties of it?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'The basic properties of electric charge are quantisation, additivity, and conservation. This includes the total charge of a body being a multiple of a basic charge unit.'

In [18]:
conversational_rag_chain.invoke(
    {"input": "Hii, I am Anurag"},
    config={"configurable": {"session_id": "anurag"}},
)["answer"]

'Hi Anurag! How can I help?'

In [19]:
conversational_rag_chain.invoke(
    {"input": "What is my name?"},
    config={"configurable": {"session_id": "anurag"}},
)["answer"]

'Your name is Anurag.'