In [10]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface.llms import HuggingFaceEndpoint
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
import os



In [11]:
model = "sentence-transformers/all-MiniLM-L6-v2"
hf_embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key = os.environ.get('hf_KEY'),
    model_name = model,
)

In [6]:
loader = PyPDFLoader("doc.pdf")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap = 200)
documents = text_splitter.split_documents(docs)


In [None]:
db = Chroma.from_documents(documents=documents, persist_directory='./chromadb',collection_name = "name_embeddings",embedding= hf_embeddings)

In [88]:
prompt = ChatPromptTemplate.from_template("""
Answer ONLY using the context below. If unsure, say "I don't know".

CONTEXT:
{context}

QUESTION: 
{input}

ANSWER (no markdown):
""")gti

In [89]:
llm = HuggingFaceEndpoint(
    repo_id="google/gemma-2-2b-it",
    task="text-generation",
    huggingfacehub_api_token=os.environ.get('hf_KEY')
)


In [90]:
document_chain=create_stuff_documents_chain(llm,prompt)
retriever=db.as_retriever()
retrieval_chain=create_retrieval_chain(retriever,document_chain)


In [92]:
response=retrieval_chain.invoke({"input":"Who is Vishnu Sharma?"})
response['answer']




'Vishnu Sharma is the author of the Panchatantra, a collection of fables. He was a pandit, a learned scholar in ancient India. His works were translated by G. L. Chandiramani.'