In [None]:
import easyocr
from pdf2image import convert_from_path
import os

pdf_path = r"D:/Research Papers/Res2Net Forgery detection approach.pdf"
output_txt = "pdf_context.txt"

pages = convert_from_path(pdf_path, dpi=300)

reader = easyocr.Reader(['en'])

with open(output_txt, "w", encoding="utf-8") as f:
    for i, page in enumerate(pages):
        print(f"Processing page {i + 1}...")
        
        image_path = f"page_{i + 1}.jpg"
        page.save(image_path, 'JPEG')

        results = reader.readtext(image_path)
        
        f.write(f"\n--- Page {i + 1} ---\n")
        for _, text, _ in results:
            f.write(text + "\n")
        
        os.remove(image_path)

In [None]:
from llama_cpp import Llama
import os

openhermes_path = r"C:\GGUF\TheBloke\OpenHermes-2.5-Mistral-7B-GGUF\openhermes-2.5-mistral-7b.Q4_K_M.gguf"

OpenHermes = Llama(
    model_path=openhermes_path,
    n_gpu_layers=20,
    n_ctx=2048,
    n_batch=256,
    n_threads=6,
    use_mlock=True,
    verbose=True
)

In [None]:
with open("pdf_context.txt", "r", encoding="utf-8") as f:
    pdf_text = f.read()

In [None]:
def build_prompt(context, question):
    return f"""<|user|>
Use the following paper content to answer the question.

{context}

Question: {question}
<|assistant|>"""

In [None]:
def ask(question):
    prompt = build_prompt(pdf_text, question)
    output = OpenHermes(prompt, max_tokens=512, stop=["<|user|>"])
    return output['choices'][0]['text'].strip()

In [None]:
from sentence_transformers import SentenceTransformer
import faiss

embedder = SentenceTransformer("all-MiniLM-L6-v2")

with open("pdf_context.txt", "r", encoding="utf-8") as f:
    pdf_text = f.read()

chunk_size = 500
chunks = [pdf_text[i:i+chunk_size] for i in range(0, len(pdf_text), chunk_size)]

embeddings = embedder.encode(chunks)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

In [None]:
def retrieve_relevant_chunks(question, top_k=3):
    q_embedding = embedder.encode([question])
    _, I = index.search(q_embedding, top_k)
    return "\n\n".join(chunks[i] for i in I[0])

def ask(question):
    context = retrieve_relevant_chunks(question)
    prompt = f"""<|user|>
Use the following context to answer the question.

{context}

Question: {question}
<|assistant|>"""
    response = OpenHermes(prompt, max_tokens=1024, stop=["<|user|>"])
    return response['choices'][0]['text'].strip()

In [None]:
while True:
    q = input("\n‚ùì Ask a question (or type 'exit'): ")
    if q.lower() in ['exit', 'quit']:
        break
    answer = ask(q)