In [1]:
!pip install sentence-transformers faiss-cpu transformers torch PyPDF2




In [2]:
import os
import time
import faiss
import numpy as np
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch


In [3]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")




Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [4]:
DIMENSION = 384
index = faiss.IndexFlatL2(DIMENSION)
document_chunks = []


In [5]:
def chunk_text(text, chunk_size=500, overlap=100):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start += chunk_size - overlap
    return chunks


In [6]:
def ingest_document(file_path):
    global document_chunks, index
    
    if not os.path.exists(file_path):
        raise FileNotFoundError("File not found. Check the path.")

    text = ""

    if file_path.lower().endswith(".pdf"):
        reader = PdfReader(file_path)
        for page in reader.pages:
            text += page.extract_text() + " "

    elif file_path.lower().endswith(".txt"):
        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()
    else:
        raise ValueError("Only PDF or TXT files are supported.")

    chunks = chunk_text(text)
    embeddings = embedding_model.encode(chunks)

    index.add(np.array(embeddings).astype("float32"))
    document_chunks.extend(chunks)

    print(f"✅ Ingested {len(chunks)} chunks")


In [7]:
def retrieve_chunks(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(
        np.array(query_embedding).astype("float32"),
        top_k
    )
    return [document_chunks[i] for i in indices[0]]


In [8]:
MODEL_NAME = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)

def local_llm_answer(context, question):
    prompt = f"""
Use the context below to answer the question.
If the answer is not in the context, say "Not found in document".

Context:
{context}

Question:
{question}

Answer:
"""
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


Loading weights:   0%|          | 0/282 [00:00<?, ?it/s]



In [9]:
def ask_question(question):
    start_time = time.time()

    chunks = retrieve_chunks(question)
    context = "\n".join(chunks)
    answer = local_llm_answer(context, question)

    latency = time.time() - start_time
    return answer, latency


In [10]:
ingest_document(r"C:\Users\Sangeetha\Downloads\SangeethaJames_InternshalaResume.pdf")


✅ Ingested 4 chunks


In [12]:
answer, latency = ask_question("What are my skills?")
print("Answer:", answer)
print("Latency:", round(latency, 2), "seconds")


Answer: Python, SQL, NumPy, Pandas, Matplotlib, Seaborn, Scikit-learn, Power BI, Jupyter Notebook, VS Code, Machine Learning, Deep Learning, Natural Language Processing (NLP)
Latency: 2.78 seconds
