In [1]:
!pip install -qU langchain langchain_community langchain_chroma langchainhub pypdf langchain-ollama langchain-text-splitters

In [13]:
import os
HOME = os.getcwd()
print(HOME)

ROOT = os.path.dirname(HOME)
print(ROOT)

/Users/shubhamrathod/PycharmProjects/RAG_Pipeline/RAG_Chain
/Users/shubhamrathod/PycharmProjects/RAG_Pipeline


# Load the Document

In [14]:
from langchain_community.document_loaders import PyPDFLoader

file_path = f'{HOME}/CC.pdf'
loader = PyPDFLoader(file_path = file_path)

pages = loader.load()

In [15]:
len(pages)

5

# Split the Document

In [16]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [17]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=8000,
    chunk_overlap=3000,
    length_function=len,
    is_separator_regex=False,
)

In [18]:
docs = text_splitter.split_documents(pages)

In [19]:
len(docs)

5

# Embedding Model

In [20]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="mxbai-embed-large:latest")

# Create Index

In [21]:
from langchain_chroma import Chroma

In [22]:
persist_directory = f'{HOME}/chroma_db'

if os.path.isdir(persist_directory):
    # Load from disk
    index = Chroma(persist_directory = persist_directory, embedding_function = embeddings)
else:
    # Save to disk    
    index = Chroma.from_documents(documents = docs, embedding = embeddings, persist_directory = persist_directory)

In [23]:
docs = index.similarity_search('Who is emily')
print(docs)

[Document(metadata={'page': 1, 'source': '/Users/shubhamrathod/PycharmProjects/RAG_Pipeline/RAG_Chain/CC.pdf'}, page_content='CALL CENTER CONVERSERTION SCRIPT 2a  \n \nAgent:           Thank you for calling ABC Company. My name is Ashley. How may I  \nhelp you today?  \nCustomer:     I am calling because I received a wrong bill. I just paid my phone bill \ntwo days ago and my                    payment is not reflected in the bill.  \nAgent:            Sorry for the inconvenience madam. May I have your Account  \nNumber, please.  \nCustomer:     5340036548  \nAgent:            For verification purposes mam, Can I get your name and birthdate?, \nplease.  \nCustomer:     5340036548  \nAgent:            For verification purposes mam, Can I get your name and birthdate?  \nCustomer:     Maegan Simpson, July 23, 1974 and the account is under my name.  \nAgent:            Thank you for that information mam. Per our system’s data, you did \npay your last bill last Aug. 12 which was two days ag

# Create Retriever

In [24]:
retriever = index.as_retriever()

In [25]:
# retriever=vectorstore.as_retriever(search_kwargs={"k": 2})

# Prompt Template

In [26]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [27]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [35]:
template = """

    You are an assistant for question-answering tasks. Use the following pieces of retrieved context and chat history to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.

    Chat History: {chat_history}
    
    Context: {context}

    Question: {question} 
    
    Answer:

"""

In [36]:
prompt = PromptTemplate(
    input_variables=["history","question"],
    template=template
)

# LLM

In [37]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="llama3.1:latest",
    temperature=0,
)

# Run Chain

In [38]:
from langchain.chains import ConversationalRetrievalChain

In [39]:
chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    chain_type="stuff",
    retriever=retriever, 
    memory=memory,
    combine_docs_chain_kwargs={"prompt": prompt},
    output_key='answer',
    # return_source_documents=True,
    verbose = False   
)

In [45]:
result = chain.invoke({'question' : "Which Bank did Tracy go to?"})
print(result)

{'question': 'Which Bank did Tracy go to?', 'chat_history': [HumanMessage(content='Which Bank did Tracy go to?'), AIMessage(content='Tracy Q. Randall went to the Bank of Wealth.')], 'answer': 'Tracy Q. Randall went to the Bank of Wealth.'}


In [46]:
result = chain.invoke({'question' : "What is Tracys accout number?"})
print(result)

{'question': 'What is Tracys accout number?', 'chat_history': [HumanMessage(content='Which Bank did Tracy go to?'), AIMessage(content='Tracy Q. Randall went to the Bank of Wealth.'), HumanMessage(content='What is Tracys accout number?'), AIMessage(content="Tracy Q. Randall's account number is 805-7845-3895-061.")], 'answer': "Tracy Q. Randall's account number is 805-7845-3895-061."}


In [47]:
result = chain.invoke({'question' : "How much balance does she have in her account?"})
print(result)

{'question': 'How much balance does she have in her account?', 'chat_history': [HumanMessage(content='Which Bank did Tracy go to?'), AIMessage(content='Tracy Q. Randall went to the Bank of Wealth.'), HumanMessage(content='What is Tracys accout number?'), AIMessage(content="Tracy Q. Randall's account number is 805-7845-3895-061."), HumanMessage(content='How much balance does she have in her account?'), AIMessage(content='Tracy Q. Randall has a current balance of $84,065.00 in her Bank of Wealth account.')], 'answer': 'Tracy Q. Randall has a current balance of $84,065.00 in her Bank of Wealth account.'}


In [48]:
result = chain.invoke({'question' : "Whos account number was asked previously?"})
print(result)

{'question': 'Whos account number was asked previously?', 'chat_history': [HumanMessage(content='Which Bank did Tracy go to?'), AIMessage(content='Tracy Q. Randall went to the Bank of Wealth.'), HumanMessage(content='What is Tracys accout number?'), AIMessage(content="Tracy Q. Randall's account number is 805-7845-3895-061."), HumanMessage(content='How much balance does she have in her account?'), AIMessage(content='Tracy Q. Randall has a current balance of $84,065.00 in her Bank of Wealth account.'), HumanMessage(content='Whos account number was asked previously?'), AIMessage(content="Tracy Q. Randall's account number, 805-7845-3895-061, was asked by the agent to verify the customer's identity and access their account information.")], 'answer': "Tracy Q. Randall's account number, 805-7845-3895-061, was asked by the agent to verify the customer's identity and access their account information."}


In [44]:
memory.clear()