In [1]:
! pip install langchain_community tiktoken langchain-google-genai langchainhub chromadb langchain pypdf




[notice] A new release of pip is available: 24.0 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import chromadb
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from rpm_limiter import RPMLimiter, call_with_retry
import os

In [3]:
loader = PyPDFLoader("R2vitamin.pdf")
docs = loader.load()

print(len(docs))

33


In [4]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

chunks = splitter.split_documents(docs)
print("Total chunks:", len(chunks))

for i, ch in enumerate(chunks[:3], start=1):
    print(f"\n==================== CHUNK {i} ====================")
    print(ch.page_content)

Total chunks: 107

Sethi et al, Int. J. Sci. Info. 2024, 1 (10), 23-55 
 
23 
 
INTERNATIONAL JOURNAL OF SCIENTIFIC INFORMATION  
www.jsiinternational.com                                   ISSN: 2583-8512 
                                         Review Article 
A Comprehensive Guide to Benefits and Production of Vitamins  
Neeraj Sethi1, Deepika Gulati2, Sushila Kaura 3* 
1Department of Biotechnology, OSGU, Hisar, Haryana 
2Department of Botany, Government College, Hisar  
3Department of Pharmacology, OSGU, Hisar, Haryana 
 
20neerajsethi@gmail.com  
deepikahau84@gmail.com 
sushilakaura@gmail.com 
 
*Corresponding Author: sushilakaura@gmail.com 
Article Received on: 26/12/23 Revised on: 3/1/24 Approved for publication: 10/1/24  
ABSTRACT   
Vitamins are essential organic compounds that play crucial roles in various physiological processes, ensuring the  
proper functioning and maintenance of the human body. These micronutrients are vital for growth, development,

proper functioning an

In [None]:
PERSIST_DIR ="./chroma_db"

os.makedirs(PERSIST_DIR, exist_ok=True)

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

client = chromadb.PersistentClient(path=PERSIST_DIR)

vectorstore = Chroma(
    client=client,
    collection_name="course_rag",
    embedding_function=embeddings,
)

limiter = RPMLimiter(max_rpm=80)  

batch_size = 60
for i in range(0, len(chunks), batch_size):
    batch = chunks[i:i + batch_size]
    call_with_retry(vectorstore.add_documents, batch, limiter=limiter)

retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

In [10]:
prompt = ChatPromptTemplate.from_template(
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question. "
    "If you don't know the answer, just say that you don't know. "
    "Use three sentences maximum and keep the answer concise.\n\n"
    "Question: {question}\n"
    "Context: {context}\n"
    "Answer:"
)

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

def format_docs(docs):
    return "\n\n".join(d.page_content for d in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What are fat-soluble vitamins and what do they do?")

"Fat-soluble vitamins include A, D, E, and K, which are absorbed with dietary fats and stored in the body's fatty tissues. They are vital for various functions such as vision, immune function, skin health, calcium absorption, bone health, and blood clotting. Vitamin E also acts as a powerful antioxidant, protecting cells from oxidative damage."