In [1]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# 📥 STORAGE: Load, split, embed, and store PDF
def store_documents(pdf_path: str, persist_directory: str = "./chroma_db"):
    loader = PyPDFLoader(pdf_path)
    pages = loader.load()

    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    documents = splitter.split_documents(pages)

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = Chroma.from_documents(documents, embedding=embeddings, persist_directory=persist_directory)
    vectorstore.persist()
    print(f"✅ Stored {len(documents)} documents in Chroma.")
    return vectorstore


# 🔍 RETRIEVAL: Load retriever from Chroma
def get_retriever(persist_directory: str = "./chroma_db"):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
    return vectorstore.as_retriever(search_kwargs={"k": 4})


# 🧠 GENERATION: Use HuggingFace LLM or Ollama model
def generate_answer(query: str, retriever, model_name: str = "mistralai/Mistral-7B-Instruct-v0.1"):
    print("⏳ Loading model (first time might take a while)...")

    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")

    # Create text generation pipeline
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512,
        temperature=0.7,
        do_sample=True,
        top_p=0.95,
        repetition_penalty=1.1
    )

    llm = HuggingFacePipeline(pipeline=pipe)

    # Create RetrievalQA chain
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
    result = qa_chain(query)

    print("\n📘 Answer:")
    print(result["result"])
    return result


# 🧪 Example Usage
if __name__ == "__main__":
    pdf_path = "./demo_pdf.pdf" 
    db_path = "./chroma_db"

    # Step 1: Store PDF contents
    store_documents(pdf_path, persist_directory=db_path)

    # Step 2: Load retriever from Chroma
    retriever = get_retriever(persist_directory=db_path)

    # Step 3: Ask a question
    question = "Summarize the key arguments presented in the document."
    answer = generate_answer(question, retriever)
    print(answer)


  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  vectorstore.persist()


✅ Stored 3 documents in Chroma.


  vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)


⏳ Loading model (first time might take a while)...


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1.
401 Client Error. (Request ID: Root=1-681058e3-74acd02d15771a276ca6ebd9;134ccc99-6b11-4fe6-a757-92391dbe6c82)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.1 is restricted. You must have access to it and be authenticated to access it. Please log in.