In [1]:
!pip install transformers torch




In [2]:
kb_text = """
The Redwood City Ravens are a fictional professional football team founded in 1984.
The team is known for its aggressive defensive style and its signature red-and-black uniforms.
Their home stadium, Raven's Nest Arena, seats 60,000 fans and features state-of-the-art training facilities.

The team's current star player is quarterback Alex "Airstream" Donovan, who is known for his exceptional
passing accuracy and mobility outside the pocket. In the 2023 season, Donovan set a franchise record
with 4,800 passing yards and 39 touchdowns.

Their head coach, Marcia Thompson, has led the Ravens since 2019. Under her leadership, the team has
qualified for the playoffs three times and won the 2022 Western Division Championship. Thompson is known
for her analytical approach to play-calling and her emphasis on player development.
"""


In [3]:
from transformers import AutoTokenizer, AutoModel
import torch
import numpy as np

embed_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embed_tokenizer = AutoTokenizer.from_pretrained(embed_model_name)
embed_model = AutoModel.from_pretrained(embed_model_name)

def embed_text(text):
    inputs = embed_tokenizer(text, return_tensors="pt", truncation=True)
    with torch.no_grad():
        outputs = embed_model(**inputs)
    # Mean pooling
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
    return embeddings.numpy()


  from .autonotebook import tqdm as notebook_tqdm
2025-11-15 14:41:03.088653: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
kb_chunks = [chunk.strip() for chunk in kb_text.split("\n") if chunk.strip()]
kb_embeddings = np.vstack([embed_text(chunk) for chunk in kb_chunks])


In [5]:
def retrieve(query, k=2):
    q_emb = embed_text(query)
    sims = np.dot(kb_embeddings, q_emb) / (
        np.linalg.norm(kb_embeddings, axis=1) * np.linalg.norm(q_emb)
    )
    top_idx = sims.argsort()[::-1][:k]
    return [kb_chunks[i] for i in top_idx], sims[top_idx]


In [6]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

gen_name = "google/flan-t5-small"
gen_tokenizer = AutoTokenizer.from_pretrained(gen_name)
gen_model = AutoModelForSeq2SeqLM.from_pretrained(gen_name)

def rag_answer(query, k=2):
    retrieved, _ = retrieve(query, k)
    context = "\n\n".join(retrieved)

    prompt = f"""
    Use the context to answer the question.
    If the answer is not in the context, say so.

    CONTEXT:
    {context}

    QUESTION:
    {query}

    ANSWER:
    """

    inputs = gen_tokenizer(prompt, return_tensors="pt")
    output = gen_model.generate(**inputs, max_length=200)
    return gen_tokenizer.decode(output[0], skip_special_tokens=True), retrieved


In [7]:
q1 = "Who is the current quarterback for the Redwood City Ravens?"
ans1, ctx1 = rag_answer(q1)
ans1


'Marcia Thompson'

In [8]:
q2 = "Where did Donovan play in college?"
ans2, ctx2 = rag_answer(q2)
ans2


'University of California, Davis'

In [9]:
q3 = "How has the team performed under Coach Thompson?"
ans3, ctx3 = rag_answer(q3)
ans3


'qualified for the playoffs three times and won the 2022 Western Division Championship'