In [8]:
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
import os
from groq import Groq

In [6]:
class RAG:
    def __init__(self, collection_name="aus_food_nutrition"):
        self.model = SentenceTransformer('all-mpnet-base-v2')
        self.chroma_client = chromadb.Client()
        self.collection = self.chroma_client.get_or_create_collection(collection_name)
        self.doc_ids = set()  # Track existing IDs for updates

    def add_or_update_documents(self, docs):
        """
        docs: list of (doc_id, doc_text) tuples.
        If doc_id exists, it will update the doc; otherwise, it adds it.
        """
        ids, texts = zip(*docs)
        embeddings = self.model.encode(texts)
        for i, doc_id in enumerate(ids):
            if doc_id in self.doc_ids:
                # Delete before updating (Chroma requires this for updating)
                self.collection.delete(ids=[doc_id])
            self.collection.add(
                documents=[texts[i]], 
                embeddings=[embeddings[i]], 
                ids=[doc_id]
            )
            self.doc_ids.add(doc_id)

    def retrieve(self, prompt, n_results=2):
        """
        Returns the most relevant document texts for the given prompt.
        """
        query_embedding = self.model.encode([prompt])[0]
        results = self.collection.query(
            query_embeddings=[query_embedding], 
            n_results=n_results
        )
        return results["documents"][0] if "documents" in results else []


In [12]:
rag = RAG()
rag.add_or_update_documents([
    ("1", "Vegemite is a popular Australian spread made from brewers' yeast extract."),
    ("2", "Kangaroo meat is a lean source of protein, low in fat."),
    ("3", "The Australian Dietary Guidelines recommend eating a variety of foods from the five food groups.")
])

# Retrieve by prompt
analyzed_health_condition = """
{
    "obesity_prediction": {
        "obesity_level": "Overweight_Level_II"
        "confidence": 10%
    },
    "diabetes_prediction": {
        "diabetes": true,
        "confidence": 90%
    }
"""

weekly_plan_format = """{
"suggestion": STRING
"weekly_plan": [
    {
        "week": 1,
        "target_calories_per_day": INT,
        "focus": STRING,
        "workouts": [ARRAY OF STRINGS],
        "meal_notes": STRING,
        "reminders": [ARRAY OF STRINGS]
    },
    ... (repeat for as many weeks as appropriate)
}]"""
prompt = f"""
You are a nutrition and fitness assistant.
Below is an analyzed health condition for a user, expressed in JSON: {analyzed_health_condition}
Your task: Based on the analyzed health condition and using the retrieved knowledge, generate a weekly plan strictly in this JSON format (replace INT and STRING placeholders): {weekly_plan_format} 
"""
relevant_texts = rag.retrieve(prompt, n_results=2)
prompt = prompt + f"\nBelow is relevant knowledge about Australian foods and dietary guidelines: {relevant_texts}"
print("Retrieved texts:", relevant_texts)

Retrieved texts: ['The Australian Dietary Guidelines recommend eating a variety of foods from the five food groups.', 'Kangaroo meat is a lean source of protein, low in fat.']


In [13]:
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt,
        }
    ],
    model="llama-3.3-70b-versatile",
)

print(chat_completion.choices[0].message.content)

Based on the analyzed health condition, I've generated a weekly plan to help the user manage their overweight level and diabetes. Here's the plan in the required JSON format:

```
{
    "suggestion": "Consult a doctor or a registered dietitian before starting any new diet or exercise plan, especially with a diabetes diagnosis."
    "weekly_plan": [
        {
            "week": 1,
            "target_calories_per_day": 1800,
            "focus": "Portion control and balanced meals",
            "workouts": ["Brisk walking", "Light swimming", "Bodyweight exercises"],
            "meal_notes": "Eat a variety of foods from the five food groups, including lean proteins like kangaroo meat, and whole grains, fruits, and vegetables.",
            "reminders": ["Drink at least 8 glasses of water per day", "Monitor blood sugar levels regularly", "Take medication as prescribed"]
        },
        {
            "week": 2,
            "target_calories_per_day": 1700,
            "focus": "Increas

# GPQA Evaluation

In [None]:
from datasets import load_dataset
import random
from huggingface_hub import login
login()

In [16]:
dataset = load_dataset("Idavidrein/gpqa", "gpqa_diamond")["train"]
print(dataset.head)  # This will show the first 2 examples

AttributeError: 'Dataset' object has no attribute 'head'

In [22]:
import os
import random
from groq import Groq
from datasets import load_dataset

def evaluate_gpqa(question, options, model="llama-3.3-70b-versatile"):
    client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
    opts_str = "\n".join([f"{chr(65+i)}. {opt}" for i, opt in enumerate(options)])
    prompt = f"You are answering a multiple-choice question. Choose the correct answer from the options below:\n\nQuestion: {question}\nOptions:\n{opts_str}\nAnswer (just the letter):"
    
    try:
        response = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model=model
        )
        answer = response.choices[0].message.content.strip()
        print(f"Predicted Answer: {answer}")
        return answer
    except Exception as e:
        print(f"Error: {e}")
        return None

def run_eval(num_questions):
    # Load the dataset
    dataset = load_dataset("Idavidrein/gpqa", "gpqa_diamond")["train"]
    
    # Debug: Print the first example to see the structure
    print("Dataset structure:")
    print(f"Type of dataset[0]: {type(dataset[0])}")
    print(f"Keys in dataset[0]: {list(dataset[0].keys()) if isinstance(dataset[0], dict) else 'Not a dict'}")
    print(f"Dataset length: {len(dataset)}")
    print("-" * 50)
    
    correct_count = 0
    
    # Use range to index directly instead of slicing
    for i in range(min(num_questions, len(dataset))):
        example = dataset[i]
        print(f"\nQuestion {i+1}/{num_questions}:")
        
        # Check if example is a dictionary
        if not isinstance(example, dict):
            print(f"Error: Expected dict but got {type(example)}")
            print(f"Content: {example}")
            continue
            
        # Based on the dataset structure, use the revised versions (not pre-revision)
        question = example.get('Question')  # Use the revised question
        correct_answer = example.get('Correct Answer')  # Use the revised correct answer
        
        # Get incorrect answers
        incorrect_answers = [
            example.get('Incorrect Answer 1'),
            example.get('Incorrect Answer 2'), 
            example.get('Incorrect Answer 3')
        ]
        
        # Validate we have all required data
        if not question:
            print(f"Could not find question")
            continue
            
        if not correct_answer:
            print(f"Could not find correct answer")
            continue
            
        # Filter out None values and clean up answers
        incorrect_answers = [ans.strip() if ans else None for ans in incorrect_answers]
        incorrect_answers = [ans for ans in incorrect_answers if ans]
        
        if len(incorrect_answers) < 3:
            print(f"Could not find 3 incorrect answers. Found {len(incorrect_answers)}")
            continue
            
        # Create options list and shuffle
        options = incorrect_answers + [correct_answer]
        random.shuffle(options)
        
        # Find the index of the correct answer after shuffling
        correct_index = options.index(correct_answer)
        correct_letter = chr(65 + correct_index)  # Convert to A, B, C, D
        
        print(f"Question: {question}")
        print("Options:")
        for idx, opt in enumerate(options):
            print(f"  {chr(65+idx)}. {opt}")
        
        # Get prediction
        pred = evaluate_gpqa(question, options)
        
        if pred and pred.upper() == correct_letter:
            print(f"✓ Correct! Predicted: {pred}, Answer: {correct_letter}")
            correct_count += 1
        else:
            print(f"✗ Wrong. Predicted: {pred}, Correct: {correct_letter}")
            print(f"  Correct answer: {correct_answer}")
    
    print(f"\nFinal Score: {correct_count}/{num_questions} ({correct_count/num_questions*100:.1f}%)")

# Example usage with debugging
if __name__ == "__main__":
    run_eval(3)  # Start with a small number for debugging

Dataset structure:
Type of dataset[0]: <class 'dict'>
Keys in dataset[0]: ['Pre-Revision Question', 'Pre-Revision Correct Answer', 'Pre-Revision Incorrect Answer 1', 'Pre-Revision Incorrect Answer 2', 'Pre-Revision Incorrect Answer 3', 'Pre-Revision Explanation', 'Self-reported question-writing time (minutes)', 'Question', 'Correct Answer', 'Incorrect Answer 1', 'Incorrect Answer 2', 'Incorrect Answer 3', 'Explanation', 'Revision Comments (from Question Writer)', 'Subdomain', "Writer's Difficulty Estimate", 'Extra Revised Question', 'Extra Revised Explanation', 'Extra Revised Correct Answer', 'Extra Revised Incorrect Answer 1', 'Extra Revised Incorrect Answer 2', 'Extra Revised Incorrect Answer 3', 'Non-Expert Validator Accuracy', 'Majority Non-Expert Vals Incorrect', 'Expert Validator Accuracy', 'Record ID', 'High-level domain', 'Question Writer', 'Feedback_EV_1', 'Validator Revision Suggestion_EV_1', 'Is First Validation_EV_1', 'Post hoc agreement_EV_1', 'Sufficient Expertise?_EV_1',

In [18]:
run_eval(10)

TypeError: string indices must be integers