In [1]:
!pip install sentence-transformers faiss-cpu rank-bm25 fastapi uvicorn httpx rapidfuzz sacrebleu rouge-score sqlalchemy

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Collecting rank-bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Collecting rapidfuzz
  Downloading rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Collecting sacrebleu
  Downloading sacrebleu-2.6.0-py3-none-any.whl.metadata (39 kB)
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker (from sacrebleu)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m83.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ra

In [2]:
import pandas as pd
import re
import json

df1 = pd.read_csv('/content/Natural-Questions-Base.csv')
df2 = pd.read_csv('/content/Natural-Questions-Filtered.csv')
df = pd.concat([df1, df2], ignore_index=True).drop_duplicates()

print("Dataset shape:", df.shape)

Dataset shape: (393585, 3)


In [3]:
def clean(text):
    if not isinstance(text, str):
        return ""
    text = text.replace("\n", " ").replace("\r", " ")
    return re.sub(r"\s+", " ", text).strip()


def chunk(text, max_words=160, overlap=30):
    words = text.split()
    chunks = []
    step = max_words - overlap

    for i in range(0, len(words), step):
        chunks.append(" ".join(words[i:i+max_words]))

    return chunks

In [4]:
def question_type(q):
    m = re.match(r"(who|what|when|where|why|how)", q.lower())
    return m.group(1) if m else "other"


def domain(q):
    q = q.lower()
    if any(x in q for x in ["movie", "actor", "film"]):
        return "entertainment"
    if any(x in q for x in ["city", "country", "capital"]):
        return "geography"
    if any(x in q for x in ["war", "president", "empire"]):
        return "history"
    return "general"


def difficulty(short_answers, long_answer):
    if short_answers and len(long_answer) < 300:
        return "easy"
    if short_answers and len(long_answer) >= 300:
        return "medium"
    return "hard"

In [5]:
records = []

for i, row in df.iterrows():
    q = clean(row.get("question", ""))
    la = clean(row.get("long_answers", ""))
    sa = clean(row.get("short_answers", ""))

    sa_list = [x.strip() for x in re.split(r"[;,|]", sa) if x.strip()]

    rec = {
        "id": f"nq_{i}",
        "question": q,
        "short_answers": sa_list,
        "chunks": chunk(la),
        "metadata": {
            "type": question_type(q),
            "domain": domain(q),
            "difficulty": difficulty(sa_list, la)
        }
    }

    records.append(rec)

print("Processed records:", len(records))

Processed records: 393585


In [6]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

model = SentenceTransformer("distiluse-base-multilingual-cased-v2")

texts = []
meta = []

for r in records:
    for c in r["chunks"]:
        texts.append(c)
        meta.append({
            "id": r["id"],
            "question": r["question"],
            "chunk": c,
            "metadata": r["metadata"]
        })

print("Embedding", len(texts), "chunks...")
vectors = model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)

dim = vectors.shape[1]
index = faiss.IndexFlatIP(dim)
index.add(vectors.astype("float32"))

print("FAISS ready.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/341 [00:00<?, ?B/s]



config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/610 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/539M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/100 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/531 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/114 [00:00<?, ?B/s]

2_Dense/model.safetensors:   0%|          | 0.00/1.58M [00:00<?, ?B/s]

Embedding 629689 chunks...
FAISS ready.


In [7]:
from rank_bm25 import BM25Okapi

tokenized = [re.findall(r"\w+", t.lower()) for t in texts]
bm25 = BM25Okapi(tokenized)

In [8]:
def normalize_query(q):
    q = q.lower()
    return re.sub(r"\s+", " ", q).strip()

def expand_query(q):
    expansions = {
        "usa": "united states america",
        "ai": "artificial intelligence"
    }
    for k, v in expansions.items():
        if k in q:
            q += " " + v
    return q

In [9]:
def retrieve(query, top_k=5):
    query = normalize_query(query)
    query = expand_query(query)

    qv = model.encode([query], normalize_embeddings=True)
    scores, idxs = index.search(qv.astype("float32"), top_k)

    tokens = re.findall(r"\w+", query.lower())
    bm25_scores = bm25.get_scores(tokens)
    max_bm = max(bm25_scores) if max(bm25_scores) > 0 else 1

    combined = []
    for idx, score in zip(idxs[0], scores[0]):
        hybrid_score = 0.7 * score + 0.3 * (bm25_scores[idx] / max_bm)
        combined.append((idx, hybrid_score))

    combined.sort(key=lambda x: x[1], reverse=True)

    return [meta[i] for i, _ in combined[:top_k]]

In [10]:
import requests
import os

def generate_answer(question, contexts):
    context_text = "\n\n".join([c["chunk"] for c in contexts])

    prompt = f"""
    Answer strictly using the context below.

    Context:
    {context_text}

    Question:
    {question}
    """

    response = requests.post(
        "https://api.groq.com/v1/chat/completions",
        headers={"Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}"},
        json={
            "model": "llama3-70b-8192",
            "messages": [{"role": "user", "content": prompt}]
        }
    )

    return response.json()

In [11]:
from functools import lru_cache

@lru_cache(maxsize=500)
def cached_retrieve(query):
    return retrieve(query)

In [12]:
from rapidfuzz import fuzz
import numpy as np
import sacrebleu
from rouge_score import rouge_scorer


def precision_at_k(contexts, gold, k=5):
    if not gold:
        return 0

    hits = 0
    for c in contexts[:k]:
        for g in gold:
            if fuzz.partial_ratio(c["chunk"].lower(), g.lower()) > 80:
                hits += 1
                break

    return hits / k


def bleu_rouge(hypothesis, gold):
    if not gold:
        return None

    bleu = sacrebleu.corpus_bleu([hypothesis], [gold]).score
    scorer = rouge_scorer.RougeScorer(["rougeL"], use_stemmer=True)
    rougeL = max(scorer.score(hypothesis, g)["rougeL"].fmeasure for g in gold)

    return {"BLEU": bleu, "ROUGE-L": rougeL}

In [13]:
from fastapi import FastAPI
from pydantic import BaseModel

app = FastAPI()

class QuestionRequest(BaseModel):
    question: str
    top_k: int = 5


@app.post("/ask-question")
def ask_question(req: QuestionRequest):
    docs = retrieve(req.question, req.top_k)
    answer = generate_answer(req.question, docs)
    return {"answer": answer}


@app.get("/health")
def health():
    return {"status": "ok"}

In [18]:
%%writefile main.py

from fastapi import FastAPI

app = FastAPI()

@app.get("/")
def root():
    return {"message": "API working"}

Writing main.py


In [20]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.5.0


In [39]:
public_url = ngrok.connect(8000)
print("🚀 Public FastAPI URL:", public_url)
!uvicorn main:app --host 0.0.0.0 --port 8000 --reload --log-level info

🚀 Public FastAPI URL: NgrokTunnel: "https://de9c-34-143-176-202.ngrok-free.app" -> "http://localhost:8000"
[32mINFO[0m:     Will watch for changes in these directories: ['/content']
[32mINFO[0m:     Uvicorn running on [1mhttp://0.0.0.0:8000[0m (Press CTRL+C to quit)
[32mINFO[0m:     Started reloader process [[36m[1m27562[0m] using [36m[1mWatchFiles[0m
[32mINFO[0m:     Started server process [[36m27568[0m]
[32mINFO[0m:     Waiting for application startup.
[32mINFO[0m:     Application startup complete.
[32mINFO[0m:     41.36.16.221:0 - "[1mGET /docs HTTP/1.1[0m" [32m200 OK[0m
[32mINFO[0m:     41.36.16.221:0 - "[1mGET /openapi.json HTTP/1.1[0m" [32m200 OK[0m




[32mINFO[0m:     Shutting down
[32mINFO[0m:     Waiting for application shutdown.
[32mINFO[0m:     Application shutdown complete.
[32mINFO[0m:     Finished server process [[36m27568[0m]
[32mINFO[0m:     Stopping reloader process [[36m[1m27562[0m]


In [38]:
from pyngrok import ngrok

ngrok.set_auth_token("32lBaLkWSAbaa3pN0QOLq7UH0xe_uoJ6vrzVhwp794o59qM2")

public_url = ngrok.connect(8000)
print(public_url)

NgrokTunnel: "https://a7ef-34-143-176-202.ngrok-free.app" -> "http://localhost:8000"


In [31]:
!uvicorn main:app --reload

[32mINFO[0m:     Will watch for changes in these directories: ['/content']
[32mINFO[0m:     Uvicorn running on [1mhttp://127.0.0.1:8000[0m (Press CTRL+C to quit)
[32mINFO[0m:     Started reloader process [[36m[1m24204[0m] using [36m[1mWatchFiles[0m
[32mINFO[0m:     Started server process [[36m24206[0m]
[32mINFO[0m:     Waiting for application startup.
[32mINFO[0m:     Application startup complete.
[32mINFO[0m:     41.36.16.221:0 - "[1mGET /docs HTTP/1.1[0m" [32m200 OK[0m
[32mINFO[0m:     41.36.16.221:0 - "[1mGET /openapi.json HTTP/1.1[0m" [32m200 OK[0m
[32mINFO[0m:     41.36.16.221:0 - "[1mGET / HTTP/1.1[0m" [32m200 OK[0m




[32mINFO[0m:     Shutting down
[32mINFO[0m:     Waiting for application shutdown.
[32mINFO[0m:     Application shutdown complete.
[32mINFO[0m:     Finished server process [[36m24206[0m]
[32mINFO[0m:     Stopping reloader process [[36m[1m24204[0m]


In [37]:
from fastapi import FastAPI

app = FastAPI()

@app.get("/predict")
def predict(text: str):
    return {"received_text": text}

In [46]:
query = "Who is Nikola Tesla?"
docs = retrieve(query, top_k=3)

for d in docs:
    print("\n--- Retrieved Chunk ---")
    print(d["chunk"])


--- Retrieved Chunk ---
<Li> Nicola Peltz as Katara </Li>

--- Retrieved Chunk ---
<Ul> <Li> Nicola Peltz as Katara </Li> </Ul>

--- Retrieved Chunk ---
Natasha Lynne as Nicky Nichols, inmate
