In [1]:
import easyocr
from pdf2image import convert_from_path
import os

pdf_path = r"D:/Research Papers/Res2Net Forgery detection approach.pdf"
output_txt = "pdf_context.txt"

pages = convert_from_path(pdf_path, dpi=300)

reader = easyocr.Reader(['en'])

with open(output_txt, "w", encoding="utf-8") as f:
    for i, page in enumerate(pages):
        print(f"Processing page {i + 1}...")
        
        image_path = f"page_{i + 1}.jpg"
        page.save(image_path, 'JPEG')

        results = reader.readtext(image_path)
        
        f.write(f"\n--- Page {i + 1} ---\n")
        for _, text, _ in results:
            f.write(text + "\n")
        
        os.remove(image_path)

print(f"✅ OCR extraction complete. Saved to: {output_txt}")

Processing page 1...
Processing page 2...
Processing page 3...
Processing page 4...
Processing page 5...
✅ OCR extraction complete. Saved to: pdf_context.txt


In [None]:
from llama_cpp import Llama
import os

openhermes_path = r"C:\GGUF\TheBloke\OpenHermes-2.5-Mistral-7B-GGUF\openhermes-2.5-mistral-7b.Q4_K_M.gguf"

OpenHermes = Llama(
    model_path=openhermes_path,
    n_gpu_layers=20,
    n_ctx=2048,
    n_batch=256,
    n_threads=6,
    use_mlock=True,
    verbose=True
)

In [3]:
with open("pdf_context.txt", "r", encoding="utf-8") as f:
    pdf_text = f.read()

In [4]:
def build_prompt(context, question):
    return f"""<|user|>
Use the following paper content to answer the question.

{context}

Question: {question}
<|assistant|>"""

In [5]:
def ask(question):
    prompt = build_prompt(pdf_text, question)
    output = OpenHermes(prompt, max_tokens=512, stop=["<|user|>"])
    return output['choices'][0]['text'].strip()

In [6]:
from sentence_transformers import SentenceTransformer
import faiss

embedder = SentenceTransformer("all-MiniLM-L6-v2")

with open("pdf_context.txt", "r", encoding="utf-8") as f:
    pdf_text = f.read()

chunk_size = 500
chunks = [pdf_text[i:i+chunk_size] for i in range(0, len(pdf_text), chunk_size)]

embeddings = embedder.encode(chunks)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

In [7]:
def retrieve_relevant_chunks(question, top_k=3):
    q_embedding = embedder.encode([question])
    _, I = index.search(q_embedding, top_k)
    return "\n\n".join(chunks[i] for i in I[0])

def ask(question):
    context = retrieve_relevant_chunks(question)
    prompt = f"""<|user|>
Use the following context to answer the question.

{context}

Question: {question}
<|assistant|>"""
    response = OpenHermes(prompt, max_tokens=1024, stop=["<|user|>"])
    return response['choices'][0]['text'].strip()

In [8]:
while True:
    q = input("\n❓ Ask a question (or type 'exit'): ")
    if q.lower() in ['exit', 'quit']:
        break
    answer = ask(q)
    print(f"\n💬 Answer:\n{answer}")


❓ Ask a question (or type 'exit'):  What is ResNet


llama_perf_context_print:        load time =   21844.82 ms
llama_perf_context_print: prompt eval time =   21844.15 ms /   542 tokens (   40.30 ms per token,    24.81 tokens per second)
llama_perf_context_print:        eval time =   19419.82 ms /   133 runs   (  146.01 ms per token,     6.85 tokens per second)
llama_perf_context_print:       total time =   41330.45 ms /   675 tokens



💬 Answer:
ResNet is a type of neural network architecture that was introduced in the paper "Deep Residual Learning for Image Recognition" by He et al. It is designed to address the problem of degradation in network performance as the number of layers increases. ResNet achieves this by introducing a shortcut or "skip" connection between the input and output of the residual block. This allows the network to learn residual functions with respect to the input, essentially making it easier for the network to learn the desired transformation. ResNets have been widely used in various image recognition tasks and are known for their ability to handle depth and improve training efficiency.



❓ Ask a question (or type 'exit'):  What is Residual block


Llama.generate: 18 prefix-match hit, remaining 512 prompt tokens to eval
llama_perf_context_print:        load time =   21844.82 ms
llama_perf_context_print: prompt eval time =   16452.39 ms /   512 tokens (   32.13 ms per token,    31.12 tokens per second)
llama_perf_context_print:        eval time =   11102.99 ms /    75 runs   (  148.04 ms per token,     6.75 tokens per second)
llama_perf_context_print:       total time =   27580.67 ms /   587 tokens



💬 Answer:
A residual block is a neural network component that aims to address the degradation problem in deep neural networks by creating a shortcut or a "residual path" that allows the input to skip some layers and directly contribute to the output. The residual block was introduced in the ResNet architecture and has been widely used in various deep learning models since then.



❓ Ask a question (or type 'exit'):  exit
