# Prototype Eval (test cases)

Evaluating the prototype on at least 10 examples to check:
- Topic Correctness
- Citation Correctness
- Format Correctness

In [8]:
# Libraries/ Imports
import sys
import os 
from pathlib import Path
import pandas as pd
from dotenv import load_dotenv
from openai import OpenAI


# Add project root to Python path
PROJECT_ROOT = Path("..").resolve()
sys.path.append(str(PROJECT_ROOT))

# From utils.py pull helper functions 
from src.utils import( 
    load_chunks,
    load_index,
    search_labs,
    search_slides,
    FAISS_SLIDES_PATH,
    FAISS_LABS_PATH,
    make_context,
    generate_7_day_plan,
  
)

load_dotenv(override=True) 
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


In [10]:
# Load Data and FIASS
slides_df, labs_df = load_chunks()

slides_index = load_index(FAISS_SLIDES_PATH)
labs_index = load_index(FAISS_LABS_PATH)

print("Slides rows:", len(slides_df), "|FAISS size:", slides_index.ntotal)
print("Labs rows:  ", len(labs_df), "|FAISS size:", labs_index.ntotal)


Slides rows: 40 |FAISS size: 40
Labs rows:   1410 |FAISS size: 1410


In [11]:
# Define feedback sample (easy, medium, hard and noisy)
# AI was able to assist us with generating 10 diverse feedback examples quickly.
# To ensure that the examples fit the categories of easy, medium, hard and noisy we will have to create new ones and/ edit the ones provided for us.
# We will use these for now just to test the functionality of the code. 

test_feedback = [
    "I lost points on SQL joins and I keep mixing up inner vs left vs right joins.",
    "I struggled with the concepts of overfitting and regularization in our machine learning assignments.",
    "The sections on neural networks and backpropagation were really hard for me to grasp.",
    "I found the data visualization part confusing, especially when to use different types of charts.",
    "I had trouble understanding the differences between supervised and unsupervised learning.",
    "The statistical concepts like p-values and confidence intervals were challenging to apply in practice.",
    "I got lost in the details of Python programming, particularly with functions and loops.",
    "The project on natural language processing was overwhelming, especially tokenization and stemming.",
    "I found it difficult to follow the steps in data cleaning and preprocessing for our datasets.",
    "The explanations about clustering algorithms like K-means and hierarchical clustering were unclear."
]

# Loop through the test feedbacks and generate plans
for i, feedback in enumerate(test_feedback):
    plan = generate_7_day_plan(test_feedback, slides_index, slides_df, labs_index, labs_df, top_k_slides=4, top_k_labs=6, model_name="gpt-4o-mini")
    
    print(plan)

### 7-Day Micro-Task Plan

#### Day 1 — Review SQL Joins
- **Task 1 (est. 15–25 min)** — Read through the SQL JOINs section in the lab file to understand the differences between INNER JOIN, LEFT JOIN, and RIGHT JOIN. Take notes on key points. [CITATION: 1]
- **Task 2 (est. 10–20 min)** — Review examples of SQL JOINs provided in the lab file and summarize how each join type works with a brief example. [CITATION: 4]

#### Day 2 — Apply SQL Joins
- **Task 1 (est. 20–30 min)** — Complete coding exercises in the lab file where you implement INNER JOIN and LEFT JOIN on sample datasets. [CITATION: 3]
- **Task 2 (est. 15–25 min)** — Modify the provided SQL queries to create RIGHT JOIN and FULL OUTER JOIN examples, then run them to see the results. [CITATION: 6]

#### Day 3 — Review Machine Learning Concepts
- **Task 1 (est. 20–30 min)** — Read about overfitting and regularization in machine learning. Take notes on definitions and examples of each concept. [CITATION: Context insufficient for sp