In [None]:
pip install sentence-transformers faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.1


In [None]:
import pandas as pd

df = pd.read_csv("courses_detailed.csv")
ALL_CODES = set(df["course_code"].dropna().astype(str))


print(df.columns)
print(len(df), "rows")
df.head()


Index(['program_name', 'req_num', 'course_code', 'title', 'credits', 'offered',
       'description', 'grading', 'repeatable', 'prerequisites', 'extra_blocks',
       'detail_url'],
      dtype='object')
3166 rows


Unnamed: 0,program_name,req_num,course_code,title,credits,offered,description,grading,repeatable,prerequisites,extra_blocks,detail_url
0,Advanced Mathematical Methods (for Students in...,1,MATH-UA 140,Linear Algebra 1,4.0,"Fall, Spring, and Summer terms",Systems of linear equations. Gaussian eliminat...,CAS Graded,No,,,https://bulletins.nyu.edu/search/?P=MATH-UA%20140
1,Advanced Mathematical Methods (for Students in...,1,MATH-UA 148,Honors Linear Algebra,,,This honors section of Linear Algebra is a pro...,CAS Graded,No,,,https://bulletins.nyu.edu/search/?P=MATH-UA%20148
2,Advanced Mathematical Methods (for Students in...,2,MATH-UA 352,Numerical Analysis,4.0,Spring,Formerly numbered MATH-UA 252; the content has...,CAS Graded,No,,,https://bulletins.nyu.edu/search/?P=MATH-UA%20352
3,Advanced Mathematical Methods (for Students in...,2,MATH-UA 358,Honors Numerical Analysis,,,Formerly numbered MATH-UA 258; the content has...,CAS Graded,No,,,https://bulletins.nyu.edu/search/?P=MATH-UA%20358
4,Advanced Mathematical Methods (for Students in...,2,MATH-GA 2010,Numerical Methods I,,Fall,This course is part of a two-course series mea...,GSAS Graded,No,,,https://bulletins.nyu.edu/search/?P=MATH-GA%20...


In [None]:
docs = []
metadata = []

for _, row in df.iterrows():
    doc = f"""
Program: {row['program_name']}
Requirement group: {row['req_num']}

Course: {row['course_code']} - {row['title']}
Credits: {row['credits']}
Typically offered: {row['offered']}
Prerequisites: {row['prerequisites']}
Repeatable: {row['repeatable']}
Grading: {row['grading']}

Description:
{row['description']}

Extra:
{row['extra_blocks']}
""".strip()

    docs.append(doc)
    metadata.append(row.to_dict())

print("Built", len(docs), "course docs.")


Built 3166 course docs.


In [None]:
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

embeddings = embedder.encode(docs, convert_to_numpy=True).astype("float32")

dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)

print("Index contains", index.ntotal, "vectors.")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Index contains 3166 vectors.


In [None]:
def retrieve_courses(query: str, k: int = 5):
    q_emb = embedder.encode([query], convert_to_numpy=True).astype("float32")
    distances, indices = index.search(q_emb, k)

    results = []
    for dist, idx in zip(distances[0], indices[0]):
        if idx == -1:
            continue
        results.append({
            "score": float(dist),
            "text": docs[idx],
            "meta": metadata[idx],
        })
    return results


In [None]:
results = retrieve_courses(
    "introductory data science course with no prerequisites and 4 credits",
    k=5
)

for r in results:
    m = r["meta"]
    print(f"Score: {r['score']:.4f}")
    print(f"{m['course_code']} - {m['title']}  ({m['credits']})")
    print("Program:", m["program_name"])
    print("Prereq:", m["prerequisites"])
    print("Offered:", m["offered"])
    print("Detail URL:", m["detail_url"])
    print("-" * 80)


Score: 0.4583
DS-UA 301 - Advanced Topics in Data Science (offered every semester)  (4)
Program: Data Science and Mathematics (BA)
Prereq: DS-UA 112 and ( MATH-UA 185 or MATH-UA 334 or MA-UY 2224 as co-requisites ) and ( CSCI-UA 473 as a co-requisite ) and restricted to Majors/Minors.
Offered: Fall and Spring
Detail URL: https://bulletins.nyu.edu/search/?P=DS-UA%20301
--------------------------------------------------------------------------------
Score: 0.4617
DS-UA 301 - Advanced Topics in Data Science (offered every semester)  (4)
Program: Data Science (BA)
Prereq: DS-UA 112 and ( MATH-UA 185 or MATH-UA 334 or MA-UY 2224 as co-requisites ) and ( CSCI-UA 473 as a co-requisite ) and restricted to Majors/Minors.
Offered: Fall and Spring
Detail URL: https://bulletins.nyu.edu/search/?P=DS-UA%20301
--------------------------------------------------------------------------------
Score: 0.4633
DS-UA 301 - Advanced Topics in Data Science (offered every semester)  (4)
Program: Computer and Da

In [None]:
def build_prompt(query: str, retrieved):
    context_parts = []
    allowed_codes = set()

    for i, item in enumerate(retrieved, start=1):
        text = item["text"]
        meta = item["meta"]
        code = str(meta.get("course_code", "")).strip()
        if code:
            allowed_codes.add(code)
        context_parts.append(f"[COURSE {i}]\n{text}")

    context = "\n\n".join(context_parts)
    allowed_codes_list = sorted(allowed_codes)

    prompt = f"""
You are an assistant helping a CAS student plan their NYU courses.

CONTEXT COURSES (you may only use these course codes):
{", ".join(allowed_codes_list)}

HARD RULES (you MUST follow all of them):
- Use ONLY courses present in the context above.
- You may NOT invent new course codes. If a course is not in the context,
  use a placeholder WITHOUT a code, like:
  - Course Code: "-"   Course Title: "Core humanities elective"
  - Course Code: "-"   Course Title: "Free elective"
- If a key course is clearly missing (e.g. a beginner Japanese class or a
  specific Data Science requirement), you must:
  - Use a placeholder without a code, and
  - State in the Notes that the exact course is missing from the context.
- Respect all prerequisites and co-requisites mentioned in the context.
- Put lower-level / introductory courses earlier when possible.
- Keep each semester between 14 and 18 credits (aim for 15–16).

OUTPUT FORMAT:
- Return ONLY a markdown table with columns:
  | Year | Semester | Course Code | Course Title | Credits | Notes |
- Do not write anything before or after the table.
- One row per course. Include all 8 semesters (4 years × Fall/Spring).

Context:
{context}

Student request:
{query}
""".strip()
    return prompt


In [None]:
import re

def clean_plan_markdown(plan_md: str) -> str:
    """
    Post-process the markdown table:
    - If a course code is not in ALL_CODES, replace it with "-" and
      keep the title as a placeholder.
    """
    lines = plan_md.strip().splitlines()
    if not lines:
        return plan_md


    header = lines[0]
    sep = lines[1] if len(lines) > 1 and set(lines[1]) <= set("|- ") else None
    body = lines[2:] if sep else lines[1:]

    cleaned_rows = []

    for line in body:
        if not line.strip().startswith("|"):
            continue
        cells = [c.strip() for c in line.strip().strip("|").split("|")]
        if len(cells) < 6:
            continue

        year, sem, code, title, credits, notes = cells[:6]


        if code not in ALL_CODES and code not in ("-", ""):
            if not title:
                title = "Free elective"
            code = "-"
            if notes:
                notes = notes + " (original code not in catalog context)"
            else:
                notes = "Original code not in catalog context"

        cleaned_rows.append(f"| {year} | {sem} | {code} | {title} | {credits} | {notes} |")

    out = [header]
    if sep:
        out.append(sep)
    out.extend(cleaned_rows)
    return "\n".join(out)


In [None]:
def answer_query_with_retrieved(query: str, retrieved, model: str = "gpt-4.1-mini") -> str:
    prompt = build_prompt(query, retrieved)
    return call_llm(prompt, model=model)


In [None]:
from google.colab import userdata
from openai import OpenAI

api_key = userdata.get("OPENAI_API_KEY")
assert api_key, "No API key found in Colab userdata under 'OPENAI_API_KEY'"

client = OpenAI(api_key=api_key)


In [None]:
import os

os.environ["OPENAI_API_KEY"] = ""


In [None]:
import openai
import os

openai.api_key = os.getenv("OPENAI_API_KEY")

SYSTEM_PROMPT = (
    "You are a helpful NYU course planning assistant. "
    "Given a context of retrieved course descriptions and a user query, "
    "you propose multi-semester plans as markdown tables."
)

def call_llm(prompt: str, model: str = "gpt-4o-mini") -> str:
    """
    Send a single text prompt to the chat model and return the text response.
    `prompt` should already include instructions + retrieved courses.
    """
    resp = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": prompt},
        ],
        temperature=0.2,
        max_tokens=1500,
    )
    return resp["choices"][0]["message"]["content"]


In [None]:
def call_llm(prompt: str, model: str = "gpt-4o-mini") -> str:
    """
    Send a single text prompt to the chat model and return the text response.
    `prompt` should already include instructions + retrieved courses.
    """
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": prompt},
        ],
        temperature=0.2,
        max_tokens=1500,
    )
    return response.choices[0].message.content


In [None]:
def answer_query(query, k=40, model="gpt-4o-mini"):
    retrieved = retrieve_courses(query, k)

    prompt = build_prompt(query, retrieved)

    raw = call_llm(prompt, model=model)


    cleaned = clean_plan_markdown(raw)
    return cleaned


In [None]:
plan_query = """
I am an incoming first-year student at NYU planning to major in Data Science.
Assume I start in Fall 2025, want to take about 15–16 credits per semester,
and finish in 4 years.

I want to spend one semester studying abroad in my 3rd year (for example,
Fall of junior year). During that semester, I will mainly take electives
or flexible requirements, not heavy prerequisite chains.

Using the course information in the context, propose a tentative 8-semester
plan with course codes and titles. Try to:

- Put introductory/1000-level courses in the first year.
- Respect listed prerequisites when possible.
- Ensure key prerequisite chains are completed BEFORE the study-abroad semester.
- Use the study-abroad semester mostly for electives or core requirements
  that do not break prerequisites.
- Leave placeholders like 'Core humanities elective' or 'Study abroad elective'
  where the catalog does not give enough detail.

Output the plan as a table with columns:
Year, Semester, Course Code, Course Title, Credits, Notes.
"""

print(answer_query(plan_query, k=40))


| Year | Semester | Course Code | Course Title                                   | Credits | Notes                                      |
|------|----------|-------------|------------------------------------------------|---------|--------------------------------------------|
| 1 | Fall | - | Principles of Data Science I | 4 | Introductory course for Data Science major (original code not in catalog context) |
| 1 | Fall | - | Data Structures | 4 | Introductory course for Data Science major (original code not in catalog context) |
| 1 | Fall | - | Core humanities elective | 4 | Placeholder for core requirement |
| 1 | Fall | - | Free elective | 3 | Placeholder for elective |
| 1 | Spring | - | Core humanities elective | 4 | Placeholder for core requirement |
| 1 | Spring | - | Free elective | 3 | Placeholder for elective |
| 1 | Spring | - | Free elective | 3 | Placeholder for elective |
| 2 | Fall | - | Core humanities elective | 4 | Placeholder for core requirement |
| 2 | Fall | - | F

In [None]:
plan_query = """
I am an incoming first-year student at NYU planning to major in Data Science.

Constraints:
- I have 12 IB credits already, so you can assume that some core/elective requirements
  are partially fulfilled. I don't want a super heavy load of random core classes early.
- I want to study Japanese starting from beginner level and continue it for multiple semesters.
- I want to spend one semester abroad (for example, Fall of my third year).
- I'd like to average about 15–16 credits per semester and finish in 4 years.

Using ONLY the course information in the context, propose a tentative 8-semester plan with
course codes and titles. Try to:
- Put introductory/1000-level and fundamental courses in the first year.
- Respect prerequisites where they are listed (e.g. complete math/programming before advanced DS).
- Ensure important prerequisite chains are mostly completed before the study-abroad semester.
- Use the study-abroad semester mostly for electives or flexible requirements.
- Include Japanese language courses where appropriate.
- Leave placeholders like "Core humanities elective" or "Study abroad elective" for requirements
  that are not specified in the course text.

Output the plan as a table with columns:
Year, Semester, Course Code, Course Title, Credits, Notes.
"""

print(answer_query(plan_query, k=60))


| Year | Semester | Course Code | Course Title                          | Credits | Notes                                           |
|------|----------|-------------|---------------------------------------|---------|-------------------------------------------------|
| 1 | Fall | - | Calculus II | 4 | Introductory math course (original code not in catalog context) |
| 1 | Fall | - | Intermediate Japanese I | 4 | Beginning Japanese language (original code not in catalog context) |
| 1 | Fall | - | Statistics for The Behavioral Sciences | 4 | Introductory statistics course (original code not in catalog context) |
| 1 | Fall | - | Core humanities elective | 3 | Placeholder for core requirement |
| 1 | Spring | - | Calculus II | 4 | Continuation of math course (original code not in catalog context) |
| 1 | Spring | - | Intermediate Japanese II | 4 | Continuation of Japanese language (original code not in catalog context) |
| 1 | Spring | - | Free elective | 3 | Placeholder for elective |
|

In [None]:
plan_query = """
I am an incoming first-year student at NYU in CAS, planning to double major in
Mathematics and Data Science.

Constraints:
- I have 12 IB credits already, so assume some core/elective requirements are fulfilled.
- I want to start Japanese from beginner level and continue it for several semesters.
- I want to spend one semester abroad (Fall of my third year), and during that term
  I prefer to take flexible electives or core courses rather than tight prerequisite chains.
- I want to finish both majors in 4 years, averaging about 15–16 credits per semester.

Using ONLY the course information in the context, propose a tentative 8-semester
plan with course codes and titles. Try to:
- Put introductory programming and lower-division math (Calculus I–III, Linear Algebra)
  in the first two years.
- Schedule key courses for the Math and Data Science majors (e.g. Real Analysis,
  Abstract Algebra, probability and statistics, machine learning, data science capstone, etc.).
- Respect prerequisites as described in the course descriptions whenever possible.
- Ensure that important prerequisite chains are mostly completed before the study-abroad term.
- Include Japanese language courses where appropriate.
- Use the study-abroad semester mostly for electives or core requirements that do not break prerequisites.
- Leave placeholders like "Core humanities elective", "Study abroad elective", or
  "Free elective" where the catalog does not give enough detail.

Output the plan as a table with columns:
Year, Semester, Course Code, Course Title, Credits, Notes.
"""

print(answer_query(plan_query, k=80))


In [None]:
plan_query = """
You are designing a four-year, 8-semester course plan for a first-year CAS student
majoring in Data Science at NYU.

Student constraints:
- Aiming for ~15–16 credits per semester.
- Has 12 IB credits, so some core / gen-ed requirements are already satisfied.
- Wants a standard 4-year graduation timeline.

Using ONLY the courses and information in the context, produce a detailed 8-semester
plan that:

1. Puts introductory / 1000-level and foundational courses (intro programming,
   intro data science, basic calculus / linear algebra, probability / statistics)
   in the first 3–4 semesters.
2. Respects all listed prerequisites and co-requisites whenever possible.
3. Schedules key Data Science courses (e.g. intro DS, statistics/probability,
   machine learning / data mining, core DS requirements, and a capstone) in a
   sensible order.
4. Keeps each semester between 14 and 18 credits, aiming for 15–16.
5. Includes placeholders like “Core humanities elective”, “Free elective”, or
   “Minor elective” where the catalog does not specify exact courses.
6. Clearly notes when a course is an assumption based on typical structure but
   is not explicitly described in the context.

OUTPUT FORMAT (very important):
Return ONLY a markdown table with the following columns:

| Year | Semester | Course Code | Course Title | Credits | Notes |

- Include one row per course (8 semesters total).
- Do NOT include any text outside of the table.
"""


In [None]:
!pip install gradio -q

import gradio as gr
import traceback

def rag_app(question: str, top_k: int):
    question = question.strip()
    if not question:
        header = [
            "Score (lower = more similar)",
            "Course code",
            "Title",
            "Credits",
            "Offered",
            "Prerequisites",
            "Program",
            "Detail URL",
        ]
        return "Please enter a question about your plan or courses.", []

    try:
        # 1) Retrieve courses from FAISS
        retrieved = retrieve_courses(question, k=int(top_k))

        # 2) Get LLM answer using your existing answer_query()
        answer = answer_query(question, k=int(top_k))

        # 3) Build a *plain* list-of-lists table (no pandas, no numpy types)
        rows = []
        for item in retrieved:
            m = item["meta"]
            rows.append([
                float(item["score"]),
                str(m.get("course_code", "")),
                str(m.get("title", "")),
                str(m.get("credits", "")),
                str(m.get("offered", "")),
                str(m.get("prerequisites", "")),
                str(m.get("program_name", "")),
                str(m.get("detail_url", "")),
            ])

        return answer, rows

    except Exception as e:

        tb = traceback.format_exc()
        msg = f"⚠️ Python error: {e}\n\n```python\n{tb}\n```"
        return msg, []


headers = [
    "Score (lower = more similar)",
    "Course code",
    "Title",
    "Credits",
    "Offered",
    "Prerequisites",
    "Program",
    "Detail URL",
]

demo = gr.Interface(
    fn=rag_app,
    inputs=[
        gr.Textbox(
            label="Your question",
            lines=6,
            placeholder=(
                "Example: I am an incoming first-year CAS student planning to major in "
                "Data Science. I have 12 IB credits and want about 15–16 credits per "
                "semester. Propose an 8-semester plan."
            ),
        ),
        gr.Slider(
            minimum=1,
            maximum=80,
            step=1,
            value=20,
            label="Top-K courses to retrieve",
        ),
    ],
    outputs=[
        gr.Markdown(label="RAG Answer"),
        gr.Dataframe(
            headers=headers,
            label="Retrieved courses used as context",
        ),
    ],
    title="NYU Course RAG Planner",
    description=(
        "Ask questions about course planning, prerequisites, and degree paths. "
        "The system retrieves relevant NYU courses with FAISS + sentence-transformers "
        "and then uses GPT-4.1-mini to generate an answer based ONLY on that context."
    ),
)

# debug=True -> traceback will print in the Colab cell if anything goes wrong
demo.launch(share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://085bca9bf632315ecd.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [None]:
df = df
docs = docs
metadata = metadata
embedder = embedder
embeddings = embeddings
index = index

def rebuild_index_from_df(df_new: pd.DataFrame):
    """
    Rebuild docs, metadata, embeddings, and FAISS index
    from a new course DataFrame.
    """
    global df, docs, metadata, embeddings, index, embedder

    df = df_new.copy()

    new_docs = []
    new_meta = []

    for _, row in df.iterrows():
        doc = f"""
Program: {row.get('program_name', '')}
Requirement group: {row.get('req_num', '')}

Course: {row.get('course_code', '')} - {row.get('title', '')}
Credits: {row.get('credits', '')}
Typically offered: {row.get('offered', '')}
Prerequisites: {row.get('prerequisites', '')}
Repeatable: {row.get('repeatable', '')}
Grading: {row.get('grading', '')}

Description:
{row.get('description', '')}

Extra:
{row.get('extra_blocks', '')}
""".strip()

        new_docs.append(doc)
        new_meta.append(row.to_dict())

    docs = new_docs
    metadata = new_meta

    print("Encoding", len(docs), "course docs...")
    embeddings = embedder.encode(docs, convert_to_numpy=True).astype("float32")
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)
    print("New index contains", index.ntotal, "vectors.")


In [None]:
import io
import gradio as gr

def load_new_catalog(file):
    """
    Gradio callback: load a new CSV, rebuild index, and show a preview.
    """
    if file is None:
        return "No file uploaded yet.", pd.DataFrame()

    try:
        # Read CSV from the uploaded file
        df_new = pd.read_csv(file.name)

        # Rebuild everything
        rebuild_index_from_df(df_new)

        # Small preview
        preview = df_new.head(10)

        msg = (
            f"Loaded new catalog with {len(df_new)} rows.\n\n"
            "The RAG index has been rebuilt. New queries will use this data."
        )
        return msg, preview

    except Exception as e:
        return f"❌ Error loading file: {e}", pd.DataFrame()


In [None]:
with gr.Blocks() as demo:
    gr.Markdown("# NYU Course RAG Planner")

    with gr.Tab("Ask the RAG"):
        question = gr.Textbox(
            label="Your question",
            lines=6,
            placeholder=(
                "Example: I am an incoming first-year CAS student planning to major in "
                "Data Science, with 12 IB credits. Propose an 8-semester plan with ~15–16 "
                "credits per semester."
            ),
        )
        top_k = gr.Slider(
            minimum=1,
            maximum=80,
            step=1,
            value=20,
            label="Top-K courses to retrieve",
        )
        answer = gr.Markdown(label="RAG Answer")
        sources_df = gr.Dataframe(label="Retrieved courses used as context")

        ask_btn = gr.Button("Ask RAG")
        ask_btn.click(
            fn=rag_app,  # the function you already wrote
            inputs=[question, top_k],
            outputs=[answer, sources_df],
        )

    with gr.Tab("Upload / Update Catalog"):
        gr.Markdown(
            "Upload a **CSV of courses** to replace the current catalog.\n\n"
            "- File must have the same columns as `courses_detailed.csv` "
            "(`course_code`, `title`, `credits`, `offered`, `prerequisites`, etc.)."
        )

        file_input = gr.File(label="Course catalog CSV", file_types=[".csv"])
        load_btn = gr.Button("Load and rebuild index")

        status_md = gr.Markdown()
        preview_df = gr.Dataframe(label="Preview of new catalog")

        load_btn.click(
            fn=load_new_catalog,
            inputs=file_input,
            outputs=[status_md, preview_df],
        )

demo.launch(share=True, debug=True)


In [None]:
from typing import List, Dict, Set
import numpy as np

def precision_at_k(retrieved: List[str], relevant: Set[str], k: int) -> float:
    """Precision@k: fraction of top-k retrieved docs that are relevant."""
    if k == 0:
        return 0.0
    top_k = retrieved[:k]
    hits = sum(1 for doc_id in top_k if doc_id in relevant)
    return hits / k

def recall_at_k(retrieved: List[str], relevant: Set[str], k: int) -> float:
    """Recall@k: fraction of all relevant docs that appear in top-k."""
    if not relevant:
        return 0.0
    top_k = retrieved[:k]
    hits = sum(1 for doc_id in top_k if doc_id in relevant)
    return hits / len(relevant)

def reciprocal_rank(retrieved: List[str], relevant: Set[str]) -> float:
    """
    MRR component: 1/rank of the first relevant doc in the ranked list.
    If no relevant doc is retrieved, returns 0.
    """
    for rank, doc_id in enumerate(retrieved, start=1):
        if doc_id in relevant:
            return 1.0 / rank
    return 0.0


In [None]:
# Ground truth: for each query_id, which course_codes are relevant
qrels: Dict[str, Set[str]] = {
    "q1": {"DS-UA 111", "CSCI-UA 2"},         # "intro DS programming" query
    "q2": {"MATH-UA 121", "MATH-UA 211"},     # "calculus sequence" query
    "q3": {"CSCI-UA 310"},                    # "algorithms" query
    # ...
}


In [None]:
# Example: BM25 retrieval results
bm25_run: Dict[str, List[str]] = {
    "q1": ["PSYCH-UA 10", "DS-UA 111", "CORE-UA 100", "CSCI-UA 2"],
    "q2": ["MATH-UA 121", "MATH-UA 123", "MATH-UA 211"],
    "q3": ["CSCI-UA 101", "CSCI-UA 310", "CSCI-UA 202"],
    # ...
}

# Example: MiniLM + FAISS results
minilm_run: Dict[str, List[str]] = {
    "q1": ["DS-UA 111", "CSCI-UA 2", "CSCI-UA 101"],
    "q2": ["MATH-UA 121", "MATH-UA 211", "DS-UA 912"],
    "q3": ["CSCI-UA 310", "CSCI-UA 101"],
    # ...
}

# Example: OpenAI embeddings results (if you tested them)
openai_run: Dict[str, List[str]] = {
    "q1": ["DS-UA 111", "CSCI-UA 2", "CSCI-UA 101"],
    "q2": ["MATH-UA 211", "MATH-UA 121"],
    "q3": ["CSCI-UA 310", "CSCI-UA 202"],
    # ...
}


In [None]:
def evaluate_run(
    run: Dict[str, List[str]],
    qrels: Dict[str, Set[str]],
    ks=(1, 5, 10)
):
    """
    run: dict mapping query_id -> ranked list of retrieved doc_ids
    qrels: dict mapping query_id -> set of relevant doc_ids
    ks: tuple of k values for P@k and R@k
    """
    p_at_k = {k: [] for k in ks}
    r_at_k = {k: [] for k in ks}
    rr_list = []

    for qid, relevant_docs in qrels.items():
        retrieved_docs = run.get(qid, [])
        if not retrieved_docs:
            # if your run missed the query entirely, treat as zeros
            for k in ks:
                p_at_k[k].append(0.0)
                r_at_k[k].append(0.0)
            rr_list.append(0.0)
            continue

        for k in ks:
            p_at_k[k].append(precision_at_k(retrieved_docs, relevant_docs, k))
            r_at_k[k].append(recall_at_k(retrieved_docs, relevant_docs, k))

        rr_list.append(reciprocal_rank(retrieved_docs, relevant_docs))

    # Aggregate
    metrics = {
        f"P@{k}": float(np.mean(pvals)) for k, pvals in p_at_k.items()
    }
    metrics.update({
        f"R@{k}": float(np.mean(rvals)) for k, rvals in r_at_k.items()
    })
    metrics["MRR"] = float(np.mean(rr_list))

    return metrics


In [None]:
bm25_metrics   = evaluate_run(bm25_run, qrels, ks=(1, 5, 10))
minilm_metrics = evaluate_run(minilm_run, qrels, ks=(1, 5, 10))
openai_metrics = evaluate_run(openai_run, qrels, ks=(1, 5, 10))

print("BM25:", bm25_metrics)
print("MiniLM:", minilm_metrics)
print("OpenAI:", openai_metrics)


BM25: {'P@1': 0.3333333333333333, 'P@5': 0.3333333333333333, 'P@10': 0.16666666666666666, 'R@1': 0.16666666666666666, 'R@5': 1.0, 'R@10': 1.0, 'MRR': 0.6666666666666666}
MiniLM: {'P@1': 1.0, 'P@5': 0.3333333333333333, 'P@10': 0.16666666666666666, 'R@1': 0.6666666666666666, 'R@5': 1.0, 'R@10': 1.0, 'MRR': 1.0}
OpenAI: {'P@1': 1.0, 'P@5': 0.3333333333333333, 'P@10': 0.16666666666666666, 'R@1': 0.6666666666666666, 'R@5': 1.0, 'R@10': 1.0, 'MRR': 1.0}


In [None]:
plan_scenarios = [
    {
        "id": "ds_fy_1",
        "program": "CAS Data Science",
        "type": "first_year",
        "description": "Incoming first-year DS major, no AP credit, wants 15–16 credits per term.",
        "prompt": (
            "I am an incoming first-year CAS student planning to major in Data Science. "
            "I have no AP credits. I want about 15–16 credits per semester and I’m okay "
            "taking two STEM courses at once but not three. Propose an 8-semester plan."
        ),
    },
    {
        "id": "ds_transfer_1",
        "program": "CAS Data Science",
        "type": "transfer",
        "description": "Rising junior transferring into DS with Calc I/II done.",
        "prompt": (
            "I am a rising junior transferring into CAS Data Science. I have already "
            "taken the equivalent of Calculus I and II and an intro programming course. "
            "I need a plan for my remaining 4 semesters to finish the DS major while "
            "keeping 14–17 credits per term."
        ),
    },
    {
        "id": "cs_fy_1",
        "program": "CAS Computer Science",
        "type": "first_year",
        "description": "Incoming CS major with AP Calc BC, wants to study abroad junior spring.",
        "prompt": (
            "I am an incoming CAS student who wants to major in Computer Science. I have AP "
            "Calc BC credit and want to study abroad in my junior spring. Please give me an "
            "8-semester plan that balances workload and keeps 14–17 credits per term."
        ),
    },
    # ... add more until you hit ~10–12 total
]


In [None]:
import pandas as pd

def markdown_table_to_df(markdown: str) -> pd.DataFrame:
    """
    Extract the first markdown table from `markdown` and convert it to a DataFrame.
    We then normalize column names to a fixed set:
    Year, Semester, Course code, Course title, Credits, Notes.
    """
    lines = markdown.strip().splitlines()

    # Find first contiguous block of lines that look like a markdown table
    table_block = []
    i = 0
    n = len(lines)
    while i < n:
        if "|" in lines[i]:
            block = []
            while i < n and "|" in lines[i]:
                block.append(lines[i])
                i += 1
            if len(block) >= 2:  # header + separator
                table_block = block
                break
        else:
            i += 1

    if not table_block:
        raise ValueError("No markdown table found in model output.")

    # helper to parse a markdown row
    def parse_row(line: str):
        line = line.strip()
        if line.startswith("|"):
            line = line[1:]
        if line.endswith("|"):
            line = line[:-1]
        return [cell.strip() for cell in line.split("|")]

    header = parse_row(table_block[0])
    data_rows = [parse_row(row) for row in table_block[2:] if row.strip()]

    df = pd.DataFrame(data_rows, columns=header)

    # -------- normalize column names --------
    rename_map = {}
    for col in df.columns:
        c = col.strip().lower()
        if c == "year":
            rename_map[col] = "Year"
        elif c in ("semester", "term"):
            rename_map[col] = "Semester"
        elif "course code" in c or c == "code":
            rename_map[col] = "Course code"
        elif "course title" in c or c == "title":
            rename_map[col] = "Course title"
        elif "credit" in c:
            rename_map[col] = "Credits"
        elif c in ("notes", "note"):
            rename_map[col] = "Notes"
        # else: leave it as-is

    df = df.rename(columns=rename_map)

    # Try to coerce Credits to numeric if present
    if "Credits" in df.columns:
        df["Credits"] = pd.to_numeric(df["Credits"], errors="coerce").fillna(0)

    return df


In [None]:
def run_scenarios(scenarios, top_k=40):
    plans = {}
    for s in scenarios:
        print("Running scenario:", s["id"])
        md = answer_query(s["prompt"], k=top_k)      # markdown string
        plan_df = markdown_table_to_df(md)           # convert to DataFrame
        plans[s["id"]] = plan_df
    return plans

scenario_plans = run_scenarios(plan_scenarios, top_k=40)


Running scenario: ds_fy_1
Running scenario: ds_transfer_1
Running scenario: cs_fy_1


In [None]:
scores_df = evaluate_all_scenarios(plan_scenarios, scenario_plans, program_specs)
print(scores_df)


     scenario_id               program  req_score  credit_score  prereq_score
0        ds_fy_1      CAS Data Science          1             1             3
1  ds_transfer_1      CAS Data Science          1             3             3
2        cs_fy_1  CAS Computer Science          3             1             3
