# Smart Academic Advisor – RAG Evaluation Notebook

This notebook is dedicated to evaluating the retrieval performance of the Smart Academic Advisor
RAG system. It includes:
- Positive test questions
- Negative (out-of-scope) questions
- Hit@K accuracy computation
- Qualitative inspection of retrieved results

This notebook does NOT modify the RAG pipeline.
It only evaluates the output of the retriever.


In [None]:
!pip install faiss-cpu sentence-transformers


Collecting faiss-cpu
  Downloading faiss_cpu-1.13.0-cp39-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.7 kB)
Downloading faiss_cpu-1.13.0-cp39-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.6/23.6 MB[0m [31m112.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.0


In [None]:
# Load the academic advisor dataset used in the main RAG notebook
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

In [None]:
data_path = "/content/drive/MyDrive/DAB_RAG_ZakyProject/data/processed/academic_advisor_rag_dataset.json"

with open(data_path, "r", encoding="utf-8") as f:
    data = json.load(f)

print("Dataset loaded ✅")
print("Total records:", len(data))
print("First record keys:", data[0].keys())

In [None]:
import json
import faiss
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
print("Embedding model loaded ✅")


In [None]:
data_path = "/content/drive/MyDrive/DAB_RAG_ZakyProject/data/processed/academic_advisor_rag_dataset.json"
with open(data_path, "r", encoding="utf-8") as f:
    data = json.load(f)

print("Dataset loaded ✅")
print("Total records:", len(data))



In [None]:
index_path = "/content/drive/MyDrive/DAB_RAG_ZakyProject/rag_artifacts/academic_faiss.index"
index = faiss.read_index(index_path)

print("FAISS index loaded ✅")
print("Total vectors in index:", index.ntotal)


In [None]:
def search(query, top_k=5, filters=None):
    query_embedding = model.encode([query], normalize_embeddings=True)
    scores, indices = index.search(query_embedding, top_k)

    results = []
    for score, idx in zip(scores[0], indices[0]):
        record = data[int(idx)]

        if filters:
            skip = False
            for k, v in filters.items():
                if record.get(k) != v:
                    skip = True
                    break
            if skip:
                continue

        results.append({
            "score": float(score),
            "text": record.get("text", ""),
            "program": record.get("program"),
            "level": record.get("level"),
            "category": record.get("category"),
        })

    return results


In [None]:
q = "What are the admission requirements for the Accountancy bachelor program?"

results = search(q, top_k=3)

print("QUESTION:")
print(q)
print("=" * 80)

for i, r in enumerate(results, start=1):
    print(f"--- Result {i} ---")
    print("Score:", round(r["score"], 3))
    print("Program:", r["program"], "| Category:", r["category"], "| Level:", r["level"])
    print(r["text"][:600], "...")
    print("-" * 80)


In [None]:
query = "How many total units are required to graduate from the B.S. in Accountancy?"

results = search(query, top_k=10)

filtered = []
for r in results:
    text_lower = r["text"].lower()
    program = str(r["program"]).lower()

    if "account" in program or "account" in text_lower:
        filtered.append(r)

print("QUESTION:")
print(query)
print("=" * 80)

for i, r in enumerate(filtered[:3], start=1):
    print(f"--- Result {i} ---")
    print("Score:", round(r["score"], 3))
    print("Program:", r["program"], "| Category:", r["category"], "| Level:", r["level"])
    print(r["text"][:600], "...")
    print("-" * 80)


In [None]:
query = "How many total units are required to graduate from the B.S. in Accountancy?"

results = search(query, top_k=30)

filtered = []
for r in results:
    text_lower = r["text"].lower()
    program = (r["program"] or "").lower()
    category = (r["category"] or "").lower()


    if "account" in program and "graduation" in category and "total units required for the b.s. degree" in text_lower:
        filtered.append(r)

print("QUESTION:")
print(query)
print("=" * 80)

for i, r in enumerate(filtered[:3], start=1):
    print(f"--- Filtered Result {i} ---")
    print("Score:", round(r["score"], 3))
    print("Program:", r["program"], "| Category:", r["category"], "| Level:", r["level"])
    print(r["text"][:1200], "...")
    print("-" * 80)


In [None]:
def test_question(question, top_k=5):
    print("QUESTION:")
    print(question)
    print("=" * 80)

    results = search(question, top_k=top_k)

    for i, r in enumerate(results, start=1):
        print(f"--- Result {i} ---")
        print("Score:", round(r["score"], 3))
        print("Program:", r["program"], "| Category:", r["category"], "| Level:", r["level"])
        print(r["text"][:700], "...")
        print("-" * 80)


In [None]:
test_question("What are the admission requirements for the Accountancy bachelor program?")


In [None]:
evaluation_set = [
    # ✅ Positive
    {"q": "What are the admission requirements for the Accountancy bachelor program?",
     "expected_keywords": ["GPA", "3.2", "3.0", "Pre-Accountancy", "60 units"], "label": "positive"},
    {"q": "How many total units are required to graduate from the B.S. in Accountancy?",
     "expected_keywords": ["Total Units Required", "120", "122"], "label": "positive"},
    {"q": "Does the B.S. in Accountancy program prepare students for CPA licensure?",
     "expected_keywords": ["CPA", "ACCT 511", "ethics"], "label": "positive"},
    {"q": "What GPA is required to be admitted to the Accountancy major at CSUN?",
     "expected_keywords": ["GPA", "3.2", "3.0"], "label": "positive"},

    # ❌ Negative
    {"q": "What is the tuition fee for international students in the Accountancy program?",
     "expected_keywords": [], "label": "negative"},
    {"q": "Does the accounting department offer online night classes in summer?",
     "expected_keywords": [], "label": "negative"},
    {"q": "What is the dress code policy for accounting students?",
     "expected_keywords": [], "label": "negative"},
]


In [None]:
def hit_at_k(question, expected_keywords, k=3):
    results = search(question, top_k=k)
    combined_text = " ".join([r["text"].lower() for r in results])

    for kw in expected_keywords:
        if kw.lower() in combined_text:
            return True
    return False

hits = 0
total = 0

for item in evaluation_set:
    q = item["q"]
    expected = item["expected_keywords"]
    label = item["label"]

    if label == "positive":
        total += 1
        if hit_at_k(q, expected, k=3):
            hits += 1

accuracy = hits / total if total > 0 else 0

print(f"Hits: {hits} / {total}")
print(f"Retrieval Accuracy@3 = {accuracy*100:.2f}%")
