In [None]:
%pip install langchain
%pip install pypdf
%pip install langchain-text-splitters
%pip install langchain_google_genai
%pip install langchain-chroma

In [22]:
import os
from dotenv import load_dotenv

load_dotenv()
HUGGING_FACE_API = os.getenv('HUGGING_FACE_API')
PINECONE_API_KEY = os.getenv('PINECONE_API')
GOOGLE_API_KEY = os.getenv('GOOGLE_API')

In [2]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
loader = PyPDFDirectoryLoader("pdfs/")
data = loader.load()

In [35]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(

    chunk_size=500,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

text_chunks = text_splitter.split_documents(data)


In [36]:
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key=HUGGING_FACE_API, model_name="sentence-transformers/all-MiniLM-l6-v2"
)

In [59]:
from langchain_chroma import Chroma

db = Chroma.from_documents(text_chunks, embeddings, persist_directory="./chroma_db")

In [65]:
query = "When was Bill Gates Born?"

retriever = db.as_retriever(search_kwargs={"k": 2})
retriever.get_relevant_documents(query)[0].page_content

'books?id=tsl3EVBtdxcC& pg=PA228& dq=Congregationalist+ + bill+ gates& hl=en& ei=GjuBTcCQI8uB0QHJo-yECQ& sa=X&\noi=book_result& ct=result& resnum=10& ved=0CFwQ6AEwCQ#v=onepage& q& f=false). Columbia University Press. . Retrieved March\n10, 2011. "Bill Gates was a member of the baby boom, born in 1955 into an upper-middle-class family near Seattle." He attended the\nCongregational Church, participated in the Boy Scouts, and went to a fancy private school."\n[17](Manes 1994, p.\xa024)'

In [62]:
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI

chain = RetrievalQA.from_chain_type(llm=ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY, convert_system_message_to_human=True), chain_type="stuff", retriever=retriever, return_source_documents=True)

In [66]:
print(chain({"query": query}))

{'query': 'When was Bill Gates Born?', 'result': '1955', 'source_documents': [Document(page_content='books?id=tsl3EVBtdxcC& pg=PA228& dq=Congregationalist+ + bill+ gates& hl=en& ei=GjuBTcCQI8uB0QHJo-yECQ& sa=X&\noi=book_result& ct=result& resnum=10& ved=0CFwQ6AEwCQ#v=onepage& q& f=false). Columbia University Press. . Retrieved March\n10, 2011. "Bill Gates was a member of the baby boom, born in 1955 into an upper-middle-class family near Seattle." He attended the\nCongregational Church, participated in the Boy Scouts, and went to a fancy private school."\n[17](Manes 1994, p.\xa024)', metadata={'page': 7, 'source': 'pdfs\\billgates.pdf'}), Document(page_content='books?id=tsl3EVBtdxcC& pg=PA228& dq=Congregationalist+ + bill+ gates& hl=en& ei=GjuBTcCQI8uB0QHJo-yECQ& sa=X&\noi=book_result& ct=result& resnum=10& ved=0CFwQ6AEwCQ#v=onepage& q& f=false). Columbia University Press. . Retrieved March\n10, 2011. "Bill Gates was a member of the baby boom, born in 1955 into an upper-middle-class fam