# RAG application built on gemini 

In [1]:
#!pip install "unstructured[pdf]"

#### From URLs loading data

In [2]:
#from langchain_community.document_loaders import UnstructuredURLLoader
#urls = ['https://www.geeksforgeeks.org/what-is-retrieval-augmented-generation-rag/']
#loader = UnstructuredURLLoader(urls=urls)
#data = loader.load()

#### By Uploading pdfs

In [3]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("yolov9_paper.pdf")
data = loader.load()

In [4]:
data

[Document(metadata={'source': 'https://www.geeksforgeeks.org/what-is-retrieval-augmented-generation-rag/'}, page_content='')]

In [5]:
len(data)

1

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)
print("Total number of documents: ",len(docs))

Total number of documents:  0


In [8]:
docs[1]

In [9]:
# Embedding models: https://python.langchain.com/v0.1/docs/integrations/text_embedding/

In [10]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
load_dotenv() 

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector = embeddings.embed_query("hello, world!")
vector[:5]
#vector

[0.05168594419956207,
 -0.030764883384108543,
 -0.03062233328819275,
 -0.02802734449505806,
 0.01813092641532421]

In [11]:
vectorstore = Chroma.from_documents(documents=docs, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

ValueError: Expected IDs to be a non-empty list, got 0 IDs

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})

retrieved_docs = retriever.invoke("What is new in yolov9?")


In [None]:
len(retrieved_docs)

In [None]:
print(retrieved_docs[5].page_content)

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.3, max_tokens=500)

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [None]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
response = rag_chain.invoke({"input": "what is new in YOLOv9?"})
print(response["answer"])