# AIMO3 Solver - Kaggle Submission Notebook
## AI Mathematical Olympiad Progress Prize 3

This notebook solves Olympiad-level math problems using:
- Open-source LLM (Open-Orca/orca_mini_3b) for chain-of-thought reasoning
- SymPy for symbolic computation and verification
- Modular pipeline for preprocessing, reasoning, and postprocessing

## 1. Install Dependencies

In [1]:
# Install required packages
!pip install --quiet sympy torch transformers accelerate peft datasets tqdm pandas numpy scipy PyPDF2

## 2. Import Libraries and Configure

In [2]:
import numpy as np
import pandas as pd
import sympy as sp
import torch
import re
import os
import json
from typing import Dict, List, Any
from tqdm import tqdm
from datetime import datetime

# HuggingFace imports
from transformers import AutoTokenizer, AutoModelForCausalLM

print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

  from .autonotebook import tqdm as notebook_tqdm


PyTorch Version: 2.10.0+cu128
CUDA Available: False


## 3. Setup Directories and Logging

In [3]:
# Setup directories
OUTPUT_DIR = "outputs"
LOG_DIR = "logs"
DATA_DIR = "datasets"

os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(DATA_DIR, exist_ok=True)

print(f"Output directory: {OUTPUT_DIR}")
print(f"Log directory: {LOG_DIR}")
print(f"Data directory: {DATA_DIR}")

Output directory: outputs
Log directory: logs
Data directory: datasets


## 4. Load LLM Model

In [None]:
# Configuration
MODEL_NAME = "gpt2"  # Using GPT-2 as fallback for demo (replace with stronger model like Mistral-7B for production)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MAX_TOKENS = 512
TEMPERATURE = 0.7

print(f"Loading model: {MODEL_NAME}")
print(f"Using device: {DEVICE}")

try:
    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
    )
    print(f"✅ Model loaded successfully!")
except Exception as e:
    print(f"⚠️ Model loading failed: {e}")
    print(f"Using preprocessing-only mode for submission generation")

Loading model: Open-Orca/orca_mini_3b
Using device: cpu


OSError: Open-Orca/orca_mini_3b is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `hf auth login` or by passing `token=<your_token>`

## 5. Define Preprocessing Functions

In [None]:
def latex_to_text(latex_expr: str) -> str:
    """
    Convert LaTeX expressions into plain text suitable for LLM input.
    """
    text = re.sub(r"\\\\\\\\", "", latex_expr)  # Remove double backslashes
    text = re.sub(r"\$\$|\$", "", text)  # Remove dollar signs
    text = re.sub(r"\\\\left|\\\\right", "", text)  # Remove delimiters
    text = re.sub(r"\\\\begin\\{.*?\\}|\\\\end\\{.*?\\}", "", text)
    text = re.sub(r"\\\\text\\{", "", text)
    text = re.sub(r"\\}", "", text)
    text = re.sub(r"\\\\frac", "frac", text)
    text = re.sub(r"\\\\sqrt", "sqrt", text)
    text = re.sub(r"\\\\[a-z]+", "", text)
    text = re.sub(r"\s+", " ", text)  # Normalize whitespace
    return text.strip()


def prepare_problem(input_data: str, input_type: str = "text") -> str:
    """
    Convert any input type to plain text problem format.
    """
    if input_type == "latex":
        return latex_to_text(input_data)
    else:
        return input_data.strip()

print("Preprocessing functions defined.")

## 6. Define LLM Reasoning Function

In [None]:
def llm_solve(problem_text: str, max_tokens: int = MAX_TOKENS, temperature: float = TEMPERATURE) -> Dict[str, str]:
    """
    Solve problem using LLM with chain-of-thought reasoning.
    """
    prompt = f"""You are a mathematical expert solving Olympiad-level problems.
Solve the following problem step-by-step:

Problem: {problem_text}

Solution:
Let me work through this carefully.

Step 1: """
    
    try:
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_p=0.9,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )
        
        raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract reasoning
        if "Solution:" in raw_output:
            reasoning = raw_output.split("Solution:")[-1]
        else:
            reasoning = raw_output
        
        return {
            "problem": problem_text,
            "prompt": prompt,
            "reasoning": reasoning.strip(),
            "raw_output": raw_output
        }
    except Exception as e:
        return {
            "problem": problem_text,
            "error": str(e),
            "reasoning": None
        }

print("LLM solving function defined.")

## 7. Define Computation Functions

In [None]:
def extract_numeric_answer(text: str) -> int:
    """
    Extract numeric answer from LLM output.
    """
    if text is None:
        return None
    
    # Look for patterns indicating final answer
    patterns = [
        r"(?:answer|result|final answer)\s*:?\s*(\d+)",
        r"(?:the answer is|equals)\s*(\d+)",
        r"(\d{1,5})\s*(?:is the answer|is correct)"
    ]
    
    for pattern in patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            try:
                return int(match.group(1))
            except ValueError:
                continue
    
    # Fallback: find all numbers and return the last one
    numbers = re.findall(r"\d+", text)
    if numbers:
        return int(numbers[-1])
    
    return None


def validate_answer(answer: int) -> int:
    """
    Validate answer is in valid AIMO range (0-99999).
    """
    if answer is None:
        return 0
    
    answer = int(answer)
    return max(0, min(answer, 99999))

print("Computation functions defined.")

## 8. Load Test Data (if available)

In [None]:
# Try to load public test data
DATA_PATH = os.path.join(DATA_DIR, "aimo3_public.csv")

if os.path.exists(DATA_PATH):
    df = pd.read_csv(DATA_PATH)
    print(f"Loaded {len(df)} problems from {DATA_PATH}")
    print(f"Columns: {df.columns.tolist()}")
    print(f"\nFirst problem:")
    print(df.iloc[0])
else:
    print(f"No test data found at {DATA_PATH}")
    print("Creating example problems for demonstration...")
    df = pd.DataFrame({
        "problem_id": ["P1", "P2", "P3"],
        "latex_problem": [
            r"Compute $2 + 3 \times 5$.",
            r"Solve $2x + 5 = 13$. What is $x$?",
            r"Find $7 \times 8$."
        ]
    })
    print(f"Created {len(df)} example problems")

## 9. Preprocess Problems

In [None]:
# Determine input column
input_col = "latex_problem" if "latex_problem" in df.columns else "problem" if "problem" in df.columns else df.columns[1]
id_col = "problem_id" if "problem_id" in df.columns else df.columns[0]

print(f"Using '{id_col}' as problem ID column")
print(f"Using '{input_col}' as problem text column")

# Preprocess problems
df["problem_text"] = df[input_col].apply(lambda x: prepare_problem(str(x), input_type="latex"))

print(f"\nPreprocessed {len(df)} problems")
print(f"\nFirst preprocessed problem:")
print(df["problem_text"].iloc[0])

## 10. Solve Problems

In [None]:
# Solve all problems
predictions = []
reasoning_steps = []
errors = []

print(f"Solving {len(df)} problems...\n")

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Solving problems"):
    problem_id = row[id_col]
    problem_text = row["problem_text"]
    
    # Solve with LLM
    result = llm_solve(problem_text, max_tokens=MAX_TOKENS)
    
    # Extract and validate answer
    if "error" not in result or result["error"] is None:
        answer = extract_numeric_answer(result["reasoning"])
        answer = validate_answer(answer)
        reasoning_steps.append(result["reasoning"])
    else:
        answer = 0
        reasoning_steps.append(f"Error: {result['error']}")
        errors.append({"problem_id": problem_id, "error": result["error"]})
    
    predictions.append(answer)

df["predicted_answer"] = predictions

print(f"\n{'='*60}")
print(f"Solved {len(df)} problems")
print(f"Successful: {sum(1 for p in predictions if p is not None)}")
print(f"Failed: {len(errors)}")
print(f"{'='*60}")

## 11. View Results

In [None]:
# Display results
print("\nPrediction Summary:")
print(df[[id_col, "predicted_answer"]].head(10))

# Show a sample reasoning
print(f"\n{'='*60}")
print("Sample Reasoning for Problem 1:")
print(f"{'='*60}")
print(f"Problem: {df['problem_text'].iloc[0]}")
print(f"\nReasoning:")
print(reasoning_steps[0][:500] + "..." if len(reasoning_steps[0]) > 500 else reasoning_steps[0])
print(f"\nFinal Answer: {df['predicted_answer'].iloc[0]}")

## 12. Save Submission

In [None]:
# Prepare submission format
submission = df[[id_col, "predicted_answer"]].copy()
submission.columns = ["problem_id", "predicted_answer"]

# Save submission CSV
submission_path = os.path.join(OUTPUT_DIR, "submission.csv")
submission.to_csv(submission_path, index=False)

print(f"Submission saved to: {submission_path}")
print(f"\nSubmission Preview:")
print(submission.head(10))

## 13. Save Detailed Logs

In [None]:
# Save reasoning steps
reasoning_log_path = os.path.join(LOG_DIR, "reasoning_steps.log")
with open(reasoning_log_path, "w") as f:
    for pid, reasoning, answer in zip(df[id_col], reasoning_steps, predictions):
        f.write(f"{'='*60}\n")
        f.write(f"Problem ID: {pid}\n")
        f.write(f"Final Answer: {answer}\n")
        f.write(f"{'='*60}\n")
        f.write(reasoning + "\n\n")

print(f"Reasoning steps saved to: {reasoning_log_path}")

# Save detailed predictions as JSON
detailed_results = []
for idx, row in df.iterrows():
    detailed_results.append({
        "problem_id": row[id_col],
        "original_problem": row[input_col],
        "prepared_problem": row["problem_text"],
        "reasoning": reasoning_steps[idx],
        "predicted_answer": row["predicted_answer"]
    })

detailed_results_path = os.path.join(LOG_DIR, "detailed_results.json")
with open(detailed_results_path, "w") as f:
    json.dump(detailed_results, f, indent=2)

print(f"Detailed results saved to: {detailed_results_path}")

## 14. Summary Statistics

In [None]:
# Summary statistics
print("\n" + "="*60)
print("AIMO3 SUBMISSION SUMMARY")
print("="*60)
print(f"\nModel: {MODEL_NAME}")
print(f"Device: {DEVICE}")
print(f"Total Problems: {len(df)}")
print(f"Predictions Made: {len([p for p in predictions if p is not None])}")
print(f"Failed Predictions: {len(errors)}")
print(f"\nAnswer Statistics:")
print(f"Min Answer: {min([p for p in predictions if p is not None])}")
print(f"Max Answer: {max([p for p in predictions if p is not None])}")
print(f"Mean Answer: {np.mean([p for p in predictions if p is not None]):.2f}")
print(f"\nFiles Saved:")
print(f"  - Submission: {submission_path}")
print(f"  - Reasoning: {reasoning_log_path}")
print(f"  - Detailed Results: {detailed_results_path}")
print("\n" + "="*60)