In [17]:
pdf_file_path='../Documents/py.pdf'

In [18]:
embedding_model_path='../MODELS/Sentence-Similarity/sentence-transformers_all-MiniLM-L6-v2'
text_gen_model_path='../MODELS/Text-Generation/Qwen_Qwen2.5-1.5B-Instruct/'

In [19]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import pipeline

In [20]:
def load_pdf_text(pdf):
    loader=PyPDFLoader(pdf)
    pages=loader.load()
    return " ".join([p.page_content for p in pages])

In [21]:
def text_chunks(text):
    chunks=RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50
    )
    return chunks.split_text(text)

In [22]:
pdf_loader=load_pdf_text(pdf_file_path)
pdf_text_chunks=text_chunks(pdf_loader)

In [23]:
embeddings=HuggingFaceEmbeddings(
    model_name=embedding_model_path
)

In [24]:
vectorStore=FAISS.from_texts(pdf_text_chunks, embeddings)

In [25]:
retriever=vectorStore.as_retriever()

In [26]:
hf_pipeline=pipeline(
    'text-generation',
    model=text_gen_model_path
)

Device set to use cpu


In [27]:
llm=HuggingFacePipeline(
    pipeline=hf_pipeline
)

In [28]:
qa_chain=RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff"
)

In [29]:
print("📄 Document QA Ready! Type your question. Type 'exit' to quit.\n")

while True:
    question= input("You: ")
    if question.lower() in ['exit', 'quit']:
        print("Existing QA.")
        break
    answer=qa_chain.invoke(question)
    print(f'Answer: {answer}')

📄 Document QA Ready! Type your question. Type 'exit' to quit.

Answer: {'query': 'What is python?', 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nIntroduction to Python: \nPython is a widely used general -purpose, high level programming language. It was created by Guido \nvan Rossum in 1991 and further developed by the Python Software Foundation. It was designed with \nan emphasis on code readability, and its syntax allows programmers to express their concepts in \nfewer lines of code. \nPython is a programming language that lets you work quickly and integrate systems more efficiently.\n\nThere are two major Python versions: Python 2 and Python 3. Both are quite different.  \nBeginning with Python programming: \n1) Finding an Interpreter:\nBefore we start Python programming, we need to have an interpreter to interpret and run our \nprograms. http://ideone.c