In [None]:
!pip -q install langchain langchain-community
!pip -q install pypdf
!pip install -q unstructured
!pip install langchain-google-genai
!pip -q install sentence_transformers
!pip install langchain-chroma
!pip install langchainhub

In [None]:
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader("/content/CH2 Inverted Index and Preprocessing .pdf")
data=loader.load()

In [None]:
len(data)

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitters=RecursiveCharacterTextSplitter(chunk_size=1000)
docs=text_splitters.split_documents(data)
print("Total number of documents :",len(docs))

In [None]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os
os.environ["GOOGLE_API_KEY"] = "YOUR API"

embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectorstore = Chroma.from_documents(documents=docs, embedding=embedding)

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})
retrieved_docs = retriever.invoke("what is inverted index")

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0.3,
    max_tokens=500 
)


In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [None]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
response = rag_chain.invoke({"input": "what is inverted index"})
print(response["answer"])