### Loading PDF File

In [1]:
from langchain.document_loaders import PyMuPDFLoader

try:
    loader = PyMuPDFLoader("./data/pdf/rag_intro.pdf")
    PDF_Doc = loader.load()
except Exception as e:
    print(f"Following error occured while loading PDF: \n {e}")

  from .autonotebook import tqdm as notebook_tqdm


### Chunking

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    separators=["\n\n","\n"," ",""],
    chunk_size = 200,
    chunk_overlap = 20,
    length_function = len
    
)

chunks = splitter.split_documents(PDF_Doc)
len(chunks)

16

### Embedding and storing Chunks in ChromaDB

In [3]:
# Using OpenAIEmbeddings
"""
from langchain.embeddings import OpenAIEmbeddings

embedding = OpenAIEmbeddings(
    openai_api_key = "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",
    model = "text-embedding-3-small"
)

text = "Hi this is a test vector"
vector = embedding.embed_query(text)

"""
# Using HuggingFaceEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
    )

vector = embeddings.embed_query("Hi this is a test vector")
vector



  embeddings = HuggingFaceEmbeddings(


[-0.01886286586523056,
 0.02682013250887394,
 -0.08314228802919388,
 -0.014210806228220463,
 0.04505985975265503,
 -0.050322651863098145,
 -0.01690148189663887,
 0.0019616447389125824,
 -0.01726575382053852,
 0.020077552646398544,
 0.05934203788638115,
 0.028926575556397438,
 -0.004097369033843279,
 0.01396571472287178,
 -0.07393265515565872,
 -0.034678295254707336,
 -0.04928213730454445,
 -0.019216952845454216,
 -0.0034388357307761908,
 0.039706360548734665,
 0.00384328979998827,
 0.029861100018024445,
 -0.06439108401536942,
 0.0007733660168014467,
 0.04584841430187225,
 0.08145888894796371,
 0.06610333919525146,
 0.01900629885494709,
 0.06997545808553696,
 -0.05009663477540016,
 0.07525406032800674,
 0.05042583867907524,
 -0.010513323359191418,
 0.07412178814411163,
 -0.08962195366621017,
 -0.023835407570004463,
 -0.00219648121856153,
 0.012949942611157894,
 -0.017673995345830917,
 0.06248042732477188,
 0.009992054663598537,
 -0.028175845742225647,
 0.026446232572197914,
 0.025584559

In [4]:
# Initialize ChromaDB and store the chunks
from langchain.vectorstores import Chroma

persist_directory = "./chroma_db"
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory=persist_directory,
    collection_name="rag_collection"
)
print(f"Vectore store created with {vectorstore._collection.count()} vectors.")
print(f"Persisted at: {persist_directory}")


Vectore store created with 16 vectors.
Persisted at: ./chroma_db


In [5]:
similarity_search = vectorstore.similarity_search("What is RAG?", k=3)
similarity_search

[Document(metadata={'creator': 'ReportLab PDF Library - www.reportlab.com', 'format': 'PDF 1.3', 'creationdate': '2025-09-24T15:52:51+00:00', 'source': './data/pdf/rag_intro.pdf', 'keywords': '', 'trapped': '', 'file_path': './data/pdf/rag_intro.pdf', 'total_pages': 10, 'author': 'anonymous', 'creationDate': "D:20250924155251+00'00'", 'page': 1, 'producer': 'ReportLab PDF Library - www.reportlab.com', 'modDate': "D:20250924155251+00'00'", 'moddate': '2025-09-24T15:52:51+00:00', 'subject': 'unspecified', 'title': 'untitled'}, page_content='What is RAG?\nRAG stands for Retrieval-Augmented Generation.\nIt is an AI technique that combines retrieval of external knowledge with generative models.'),
 Document(metadata={'total_pages': 10, 'author': 'anonymous', 'title': 'untitled', 'page': 9, 'trapped': '', 'modDate': "D:20250924155251+00'00'", 'subject': 'unspecified', 'creator': 'ReportLab PDF Library - www.reportlab.com', 'keywords': '', 'creationDate': "D:20250924155251+00'00'", 'source': 

In [6]:
similarity_search = vectorstore.similarity_search_with_score("What is RAG?", k=3)
similarity_search

[(Document(metadata={'source': './data/pdf/rag_intro.pdf', 'file_path': './data/pdf/rag_intro.pdf', 'format': 'PDF 1.3', 'title': 'untitled', 'creator': 'ReportLab PDF Library - www.reportlab.com', 'subject': 'unspecified', 'total_pages': 10, 'creationDate': "D:20250924155251+00'00'", 'modDate': "D:20250924155251+00'00'", 'keywords': '', 'page': 1, 'creationdate': '2025-09-24T15:52:51+00:00', 'producer': 'ReportLab PDF Library - www.reportlab.com', 'trapped': '', 'moddate': '2025-09-24T15:52:51+00:00', 'author': 'anonymous'}, page_content='What is RAG?\nRAG stands for Retrieval-Augmented Generation.\nIt is an AI technique that combines retrieval of external knowledge with generative models.'),
  0.35862988233566284),
 (Document(metadata={'total_pages': 10, 'page': 9, 'creator': 'ReportLab PDF Library - www.reportlab.com', 'modDate': "D:20250924155251+00'00'", 'subject': 'unspecified', 'title': 'untitled', 'trapped': '', 'producer': 'ReportLab PDF Library - www.reportlab.com', 'source':

### LLM Model Integration using HuggingFace

In [None]:
from langchain.chat_models import ChatOpenAI

api_key = "YOUr_Hugging_Face_API_Key"

llm = ChatOpenAI(
    openai_api_key=api_key,
    base_url="https://router.huggingface.co/v1",
    model="meta-llama/Llama-3.1-8B-Instruct"
)
response = llm.invoke("What is LLM large language model? Kepp answer short and precise.")
print(response.content)



A Large Language Model (LLM) is a type of artificial intelligence (AI) that:

1. Processes and analyzes vast amounts of text data.
2. Generates human-like responses or text based on input.
3. Learns patterns, relationships, and context from the data.
4. Can perform tasks like language translation, summarization, and question-answering.

Examples of LLMs include:

- Chatbots
- Virtual assistants
- Language translation tools
- Text generators


### Definig retriver

In [16]:
retriver = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":5})
#retriever = vectorstore.as_retriever()

### Starting RAG chain

In [17]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriver,
    return_source_documents=True,
 )

result = qa_chain.invoke("What are the benefits and use cases of RAG?")
print("Answer:", result["result"])

# Show source documents and page numbers
for i, doc in enumerate(result["source_documents"]):
    print(f"Source {i+1}: page {doc.metadata.get('page', 'N/A')}, content: {doc.page_content[:100]}...")

Answer: The benefits of RAG (Retrieval-Augmented Generation) include:

1. **Access to external, up-to-date information**: RAG can retrieve information from external sources, providing the model with the latest knowledge and facts.
2. **Improved factual accuracy and reduced hallucination**: By retrieving and verifying information from external sources, RAG can reduce the likelihood of generating incorrect or made-up information (hallucinations).
3. **Context-aware and relevant responses**: RAG can generate responses that are tailored to the context and topic at hand, making them more relevant and useful to the user.

The use cases of RAG include:

1. **Virtual assistants**: RAG can be used in virtual assistants like Siri, Alexa, or Google Assistant to provide more accurate and relevant responses to user queries.
2. **Chatbots**: RAG can be used in chatbots to generate more informative and helpful responses to user inquiries.
3. **Question answering systems**: RAG can be used in question