In [None]:
!pip install -q faiss-cpu sentence-transformers openai PyPDF2
!pip install -q PyPDF2

In [None]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import PyPDF2

In [None]:
import faiss
print("FAISS working")

In [None]:
from getpass import getpass
import os
from openai import OpenAI

os.environ["OPENAI_API_KEY"] = getpass("Enter API key: ")
client = OpenAI()

In [None]:
from google.colab import files

uploaded = files.upload()

In [None]:
import PyPDF2

def extract_pdf_text(file_name):
    text = ""
    with open(file_name, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text

# Get uploaded filename
pdf_name = list(uploaded.keys())[0]

# Extract
pdf_text = extract_pdf_text(pdf_name)

print("Preview:\n")
print(pdf_text[:1000])


In [None]:
documents = [pdf_text]

In [None]:
model = SentenceTransformer("all-MiniLM-L6-v2")

# Simple chunking
def chunk_text(text, chunk_size=200, overlap=40):
    words = text.split()
    chunks = []

    start = 0
    while start < len(words):
        end = start + chunk_size
        chunk = words[start:end]
        chunks.append(" ".join(chunk))

        start += chunk_size - overlap

    return chunks


# Apply to documents
chunks = []
for doc in documents:
    chunks.extend(chunk_text(doc))

print("Total chunks:", len(chunks))


# Embed
embeddings = model.encode(chunks)
embeddings = np.array(embeddings).astype("float32")

In [None]:
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)

print("Indexed chunks:", len(chunks))

In [None]:
def retrieve(query, k=3):
    q_emb = model.encode([query]).astype("float32")
    distances, indices = index.search(q_emb, k)

    results = [chunks[i] for i in indices[0]]
    return results

In [None]:
SYSTEM_PROMPT = """
You are a QA assistant.
Answer ONLY using the provided context.
If the answer is not in the context, reply:

"I don't know based on the provided documents."
"""

def ask(query):
    context = retrieve(query)
    context_text = "\n".join(context)

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role":"system","content":SYSTEM_PROMPT},
            {"role":"user","content":
             f"Context:\n{context_text}\n\nQuestion:{query}"}
        ]
    )

    return response.choices[0].message.content


In [None]:
while True:
    q = input("You: ")
    if q.lower() in ["exit","quit"]:
        break
    print("Bot:", ask(q))
