# Simple RAG from CSV (No LangChain/LlamaIndex)

This notebook demonstrates a minimal Retrieval-Augmented Generation (RAG) pipeline using:
- **CSV** as the knowledge source
- **OpenAI embeddings** (`text-embedding-3-small`) for retrieval
- **OpenAI chat model** (`gpt-4o-mini`) for final answer generation

The implementation avoids orchestration frameworks and uses only basic Python libraries (`pandas`, `numpy`, `openai`).

In [None]:
# If needed, uncomment and run:
# %pip install -q openai pandas numpy

In [None]:
import os
import numpy as np
import pandas as pd
from openai import OpenAI

# Make sure your API key is available:
# export OPENAI_API_KEY="your_key_here"
assert os.getenv("OPENAI_API_KEY"), "Please set OPENAI_API_KEY before running this notebook."

client = OpenAI()

## 1) Load CSV knowledge base

In [None]:
csv_path = "../data/sample_knowledge.csv"  # adjust if needed

kb_df = pd.read_csv(csv_path)
kb_df

## 2) Build text chunks to embed

For simplicity, each row is one chunk.

In [None]:
kb_df["chunk_text"] = (
    "Title: " + kb_df["title"].astype(str) + "\n"
    + "Content: " + kb_df["content"].astype(str)
)

kb_df[["id", "chunk_text"]].head()

## 3) Create embeddings for all chunks

In [None]:
embedding_model = "text-embedding-3-small"


def get_embedding(text: str, model: str = embedding_model) -> np.ndarray:
    response = client.embeddings.create(model=model, input=text)
    return np.array(response.data[0].embedding, dtype=np.float32)

kb_df["embedding"] = kb_df["chunk_text"].apply(get_embedding)
print(f"Created {len(kb_df)} embeddings. Vector size: {kb_df['embedding'].iloc[0].shape[0]}")

## 4) Retrieve top-k relevant chunks for a user query

In [None]:
def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
    return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))


def retrieve(query: str, k: int = 3) -> pd.DataFrame:
    query_emb = get_embedding(query)
    scored = kb_df.copy()
    scored["score"] = scored["embedding"].apply(lambda emb: cosine_similarity(query_emb, emb))
    return scored.sort_values("score", ascending=False).head(k)

user_query = "How long does international shipping take?"
retrieved = retrieve(user_query, k=3)
retrieved[["id", "title", "score"]]

## 5) Build augmented prompt and generate answer

In [None]:
generation_model = "gpt-4o-mini"

context = "\n\n---\n\n".join(retrieved["chunk_text"].tolist())

system_prompt = (
    "You are a helpful assistant. Answer only from the provided context. "
    "If the answer is not in context, say you don't know."
)

user_prompt = f"""
Question: {user_query}

Context:
{context}
"""

response = client.chat.completions.create(
    model=generation_model,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ],
    temperature=0,
)

print(response.choices[0].message.content)

## 6) Optional: wrap into one function

Use this to ask multiple questions after embeddings are built once.

In [None]:
def ask_rag(question: str, k: int = 3) -> str:
    top_docs = retrieve(question, k=k)
    context_text = "\n\n---\n\n".join(top_docs["chunk_text"].tolist())

    prompt = f"""
Question: {question}

Context:
{context_text}
"""

    answer = client.chat.completions.create(
        model=generation_model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt},
        ],
        temperature=0,
    )
    return answer.choices[0].message.content

ask_rag("When can I get a refund?")