In [37]:
!pip install -qU langchain langchain_community langchain_chroma langchainhub pypdf langchain-ollama langchain-text-splitters

In [1]:
import os
HOME = os.getcwd()
print(HOME)

ROOT = os.path.dirname(HOME)
print(ROOT)

/Users/shubhamrathod/PycharmProjects/RAG_Pipeline/RAG_Chain
/Users/shubhamrathod/PycharmProjects/RAG_Pipeline


# Load the Document

In [2]:
from langchain_community.document_loaders import PyPDFLoader

file_path = f'{HOME}/CC.pdf'
loader = PyPDFLoader(file_path = file_path)

pages = loader.load()

In [3]:
len(pages)

5

# Split the Document

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=8000,
    chunk_overlap=3000,
    length_function=len,
    is_separator_regex=False,
)

In [6]:
docs = text_splitter.split_documents(pages)

In [7]:
len(docs)

5

# Embedding Model

In [8]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="mxbai-embed-large:latest")

# Create Index

In [9]:
from langchain_chroma import Chroma

In [10]:
persist_directory = f'{HOME}/chroma_db'

if os.path.isdir(persist_directory):
    # Load from disk
    index = Chroma(persist_directory = persist_directory, embedding_function = embeddings)
else:
    # Save to disk    
    index = Chroma.from_documents(documents = docs, embedding = embeddings, persist_directory = persist_directory)

In [11]:
docs = index.similarity_search('Who is emily')
print(docs)

[Document(metadata={'page': 2, 'source': '/Users/shubhamrathod/PycharmProjects/RAG_Pipeline/RAG_Chain/CC.pdf'}, page_content='Sample Call Center Script: Technical Support Hotline  \nEmily  \nGood afternoon. TBH Network Solutions.  \nFred  \nYeah, hi. My system is down and I need \nto speak with a technician.  \nEmily  \nOh, okay. Let me gather some \ninformation and see if we can help. What \nis your first name?  \nFred  \nFred.  \nEmily  \n And your last nam e; would you spell it \nfor me please? Fred Sure. It’s C -H-A-M-B-\nE-R-S, Chambers.  \nEmily  \nOkay. And your company name?  \nFred  \nI’m with GoldStar Environmental.  \nEmily  \nGoldStar Environmental?  \nFred  \nYes ma’am.  \nEmily  \nOkay. And your callback number?  \nFred  \n610-265-1715.  \nEmily  \nThat’s 610 -265-1715?  \nFred  \nYes.  \nEmily  \nOkay. And what seems to be the problem \ntoday?  Fred  \nMy agents aren’t able to make or receive \nany telephone calls.  \nEmily  \nOkay and what type of system do you \nhave. 

# Create Retriever

In [12]:
retriever = index.as_retriever()

# Prompt Template

In [13]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [14]:
template = """

    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.

    Question: {question} 
    
    Context: {context} 
    
    Answer:

"""

In [15]:
custom_rag_prompt = PromptTemplate.from_template(template)

# Chain

In [17]:
def format_docs(pages):
    return "\n\n".join(page.page_content for page in pages)

In [18]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="mistral-nemo:latest",
    temperature=0,
)

In [19]:
rag_chain = (
    {
        "context" : retriever | format_docs,
        "question": RunnablePassthrough()
    }
    |custom_rag_prompt
    |llm
    |StrOutputParser()
)

In [20]:
result = rag_chain.invoke("Which Bank did Tracy go to?")

In [21]:
print(result)

Tracy went to Bank of Wealth.


In [22]:
query = "What is  Tracy Q. Randall account number?"
result = rag_chain.invoke(query)
print(result)

Tracy Q. Randall's account number is 805-7845-3895-061.


In [27]:
query = "What did John Perez order from Pizza Loco?"
result = rag_chain.invoke(query)
print(result)

John Perez did not order from Pizza Loco.


In [25]:
query = "What is  Maegan Simpson account number?"
result = rag_chain.invoke(query)
print(result)

Maegan Simpson's account number is 5340036548.


In [26]:
query = "What should Rohan Sharma do to switch sim card?"
result = rag_chain.invoke(query)
print(result)

To switch sim cards, Rohan Sharma should contact Reliance Infocomm to request a new sim card. He will need to provide a copy of his ration card or driving license and two photos of himself. An agent will visit him at his convenience (he mentioned 11 am tomorrow) to collect the documents and deliver the new sim card.
