In [2]:
import sys
print(sys.executable)

/workspaces/Kaggle-Solution-Finder-Agent/app/.venv/bin/python


In [3]:
import os
from pathlib import Path

# 1) Redirect logs to eval_logs/
os.environ["LOGS_DIRECTORY"] = "eval_logs"

# 2) Make app/ importable
REPO_ROOT = Path.cwd().parent
sys.path.insert(0, str(REPO_ROOT / "app"))

from ingest import (read_repo_data,extract_completed_competitions,build_vector_index)
from search_agent import create_search_agent
from logs import log_interaction_to_file, LOG_DIR
from pydantic_ai import Agent
from logs import EvaluationChecklist

  from .autonotebook import tqdm as notebook_tqdm


In [29]:
LOG_DIR.absolute()

PosixPath('/workspaces/Kaggle-Solution-Finder-Agent/eval/logs')

In [5]:
project_docs = read_repo_data(repo_owner="Khangtran94",repo_name="kaggle-solutions",branch="gh-pages")
completed_competitions = extract_completed_competitions(project_docs)
embedding_model, vindex = build_vector_index(completed_competitions)

Batches: 100%|██████████| 18/18 [00:44<00:00,  2.49s/it]


In [71]:
question_generation_prompt = """
## Prompt: Question Generator AI

**Input:** A list of Kaggle ML competition projects, including the title, evaluation metric, URL, and number of solutions.

**Your task:** For each Kaggle competition, generate a set of **at least 4 questions**. The questions should cover different aspects of the competition and should vary in complexity, from basic to advanced. These questions should be specific to the competition's task, dataset, evaluation metric, and any best practices.

### Ensure the following:
- **Generate at least 4 questions** for each competition, covering the following aspects:
    1. **Evaluation Metric:** What does the metric measure? How can it be optimized in this competition?
    2. **Domain:** What is the domain of the project 
    3. **Title** Title of project, related to any domain ?
    4. **Number of Solutions** Count number of solutions

- **Avoid generic questions** like "What is the competition about?" 
- Questions should be specific to the **core tasks** of the competition, especially the metric and domain, title, number of solutiuons
- The questions should be varied in style, complexity, and focus, so they help the user cover all aspects of the competition.
""".strip()

from pydantic import BaseModel
from pydantic_ai import Agent

class QuestionsList(BaseModel):
    questions: list[str]

question_generate_agent = Agent(
    name="question_generate_agent",
    instructions=question_generation_prompt,
    model="gpt-4o-mini",
    output_type=QuestionsList)

import random
import json
# Sample 10 random completed competitions from the list
sample = random.sample(completed_competitions, 10)

# Create the prompt docs
prompt_docs = [
    (
        f"Title: {d.get('title', 'N/A')}\n"
        f"Metric: {d.get('metric', 'N/A')}\n"
        f"Link: {d.get('link', 'N/A')}\n"
        f"Number of Solutions: {len(d['solutions']) if isinstance(d.get('solutions'), list) else d.get('solutions', 'N/A')}\n"
        "Generate at least 4 relevant questions for the following competition:\n"
        "- One question about the **evaluation metric**.\n"
        "- One question about the **title**.\n"
        "- One question about **domain** used.\n"
        "- One question about the **number of solutions** used in this competition.\n"
    )
    for d in sample
]

# Join the formatted prompt docs into a single string
prompt = "\n\n".join(prompt_docs)

# Print or pass this prompt to the model
print(prompt)

Title: ARC Prize 2024
Metric: Abstraction and Reasoning Challenge
Link: https://www.kaggle.com/c/arc-prize-2024
Number of Solutions: 7
Generate at least 4 relevant questions for the following competition:
- One question about the **evaluation metric**.
- One question about the **title**.
- One question about **domain** used.
- One question about the **number of solutions** used in this competition.


Title: Mayo Clinic - STRIP AI
Metric: Weighted Multiclass Loss
Link: https://www.kaggle.com/c/mayo-clinic-strip-ai
Number of Solutions: 10
Generate at least 4 relevant questions for the following competition:
- One question about the **evaluation metric**.
- One question about the **title**.
- One question about **domain** used.
- One question about the **number of solutions** used in this competition.


Title: Playground Series - Season 3, Episode 7
Metric: Area Under Receiver Operating Characteristic Curve
Link: https://www.kaggle.com/c/playground-series-s3e7
Number of Solutions: 6
Gener

In [72]:
prompt_docs

['Title: ARC Prize 2024\nMetric: Abstraction and Reasoning Challenge\nLink: https://www.kaggle.com/c/arc-prize-2024\nNumber of Solutions: 7\nGenerate at least 4 relevant questions for the following competition:\n- One question about the **evaluation metric**.\n- One question about the **title**.\n- One question about **domain** used.\n- One question about the **number of solutions** used in this competition.\n',
 'Title: Mayo Clinic - STRIP AI\nMetric: Weighted Multiclass Loss\nLink: https://www.kaggle.com/c/mayo-clinic-strip-ai\nNumber of Solutions: 10\nGenerate at least 4 relevant questions for the following competition:\n- One question about the **evaluation metric**.\n- One question about the **title**.\n- One question about **domain** used.\n- One question about the **number of solutions** used in this competition.\n',
 'Title: Playground Series - Season 3, Episode 7\nMetric: Area Under Receiver Operating Characteristic Curve\nLink: https://www.kaggle.com/c/playground-series-s3e7\

In [73]:
# Generate questions for each competition individually
questions = []

# Loop through each competition in the sample
for competition in sample:
    # Create the individual prompt for the competition
    prompt = (
        f"Title: {competition.get('title', 'N/A')}\n"
        f"Metric: {competition.get('metric', 'N/A')}\n"
        f"Link: {competition.get('link', 'N/A')}\n"
        f"Number of Solutions: {len(competition['solutions']) if isinstance(competition.get('solutions'), list) else competition.get('solutions', 'N/A')}\n"
        "Generate at least 4 relevant questions for the following competition:\n"
        "- One question about the **evaluation metric**.\n"
        "- One question about the **title**.\n"
        "- One question about **domain** used.\n"
        "- One question about the **number of solutions** used in this competition.\n"
    )
    
    # Generate the questions for this competition
    question_generated = await question_generate_agent.run(prompt)
    
    # Append the questions to the final list
    questions.extend(question_generated.output.questions)

# Print the total number of questions
print(len(questions))  # Should print 40 if you have 10 competitions (4 questions each)

# Print the questions to check them
print(questions)

40
['What does the Abstraction and Reasoning Challenge evaluation metric measure in the ARC Prize 2024 competition and what techniques could participants use to optimize it?', "How does the title 'ARC Prize 2024' reflect the underlying tasks and goals of this competition?", 'What are the key domains and topics covered in the ARC Prize 2024 competition, and how do they relate to artificial intelligence and reasoning?', 'With only 7 solutions submitted in the ARC Prize 2024 competition, what competitive strategies might impact the performance and diversity of approaches among participants?', 'What does the Weighted Multiclass Loss metric measure, and how can participants optimize their models to achieve a lower value for this metric in the Mayo Clinic - STRIP AI competition?', "How does the title 'Mayo Clinic - STRIP AI' reflect the focus of the project, and what implications does it have for the types of solutions being submitted?", 'What medical domain challenges are being addressed in

In [74]:
questions

['What does the Abstraction and Reasoning Challenge evaluation metric measure in the ARC Prize 2024 competition and what techniques could participants use to optimize it?',
 "How does the title 'ARC Prize 2024' reflect the underlying tasks and goals of this competition?",
 'What are the key domains and topics covered in the ARC Prize 2024 competition, and how do they relate to artificial intelligence and reasoning?',
 'With only 7 solutions submitted in the ARC Prize 2024 competition, what competitive strategies might impact the performance and diversity of approaches among participants?',
 'What does the Weighted Multiclass Loss metric measure, and how can participants optimize their models to achieve a lower value for this metric in the Mayo Clinic - STRIP AI competition?',
 "How does the title 'Mayo Clinic - STRIP AI' reflect the focus of the project, and what implications does it have for the types of solutions being submitted?",
 'What medical domain challenges are being addressed

In [75]:
query_agent = create_search_agent(embedding_model, vindex) 
query_agent 
for q in questions: 
    print(q) 
    answer_question = await query_agent.run(user_prompt = q) 
    log_interaction_to_file(query_agent, answer_question.new_messages())

What does the Abstraction and Reasoning Challenge evaluation metric measure in the ARC Prize 2024 competition and what techniques could participants use to optimize it?
How does the title 'ARC Prize 2024' reflect the underlying tasks and goals of this competition?
What are the key domains and topics covered in the ARC Prize 2024 competition, and how do they relate to artificial intelligence and reasoning?
With only 7 solutions submitted in the ARC Prize 2024 competition, what competitive strategies might impact the performance and diversity of approaches among participants?
What does the Weighted Multiclass Loss metric measure, and how can participants optimize their models to achieve a lower value for this metric in the Mayo Clinic - STRIP AI competition?
How does the title 'Mayo Clinic - STRIP AI' reflect the focus of the project, and what implications does it have for the types of solutions being submitted?
What medical domain challenges are being addressed in the Mayo Clinic - STRI