## Code 1

In [None]:
!pip install langchain -q
!pip install pypdf -q
!pip install sentence-transformers -q
!pip install chromadb -q
!pip install openai -q
!pip install langchain_community -q
!pip install langchain_google_genai -q
!pip install langchain_openai -q

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.memory import ConversationBufferMemory
from langchain_community.embeddings import HuggingFaceEmbeddings
import warnings
warnings.filterwarnings("ignore")

In [None]:
loader = PyPDFLoader("data.pdf")
data = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=768, chunk_overlap=128)
texts = text_splitter.split_documents(data)

In [None]:
embedding = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en")
vectorstore = Chroma.from_documents(
    documents=texts,
    embedding=embedding,
    persist_directory="chroma_store"
)

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key="")  # Replace with your Google API key at https://aistudio.google.com/app/apikey

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)
while True:
    query = input()
    if query == "exit":
        break
    result = qa({"query": query})
    print("Gemini Pro:", result["result"])

## Code 2

In [None]:
!pip install pypdf2
!pip install chromadb
!pip install google.generativeai
!pip install langchain-google-genai
!pip install langchain
!pip install langchain_community
!pip install jupyter

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Chroma

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key="")
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key="")

In [None]:
loader = PyPDFLoader("data.pdf")
text_splitter = CharacterTextSplitter(
    separator=".",
    chunk_size=768,
    chunk_overlap=128,
    length_function=len,
    is_separator_regex=False,
)
pages = loader.load_and_split(text_splitter)

In [None]:
vectordb=Chroma.from_documents(pages, embeddings)

In [None]:
retriever = vectordb.as_retriever(search_kwargs={"k": 10})

In [None]:
template = """
You are a helpful AI assistant. Answer based on the context provided.
context: {context}
input: {input}
answer:
"""
prompt = PromptTemplate.from_template(template)
combine_docs_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [None]:
while True:
    query = input()
    if query == "exit":
        break
    response = retrieval_chain.invoke({"input": query})
    print("Gemini Pro:", response["answer"])