In [None]:
# GPT-OSS-20B with TIR and Time Slicing
# Based on winning AIMO approaches

import os
import sys

os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import polars as pl
import re
import subprocess
import tempfile
import time
from typing import Optional, List, Dict, Tuple
from collections import Counter
import kaggle_evaluation.aimo_3_inference_server

# DEBUG: Check what inputs are available
print("Available inputs in /kaggle/input/:")
if os.path.exists('/kaggle/input/'):
    for item in os.listdir('/kaggle/input/'):
        print(f"  - {item}")
else:
    print("  /kaggle/input/ does not exist (running locally)")

In [None]:
def execute_python_code(code: str, timeout: int = 10) -> Tuple[Optional[int], str]:
    """Safely execute Python code and return the result."""
    try:
        with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
            f.write(code)
            temp_file = f.name
        
        try:
            result = subprocess.run(
                ['python3', temp_file],
                capture_output=True,
                text=True,
                timeout=timeout
            )
            
            output = result.stdout + result.stderr
            
            # Extract the last number printed
            lines = output.strip().split('\n')
            for line in reversed(lines):
                numbers = re.findall(r'\b(\d{1,5})\b', line)
                if numbers:
                    try:
                        answer = int(numbers[-1])
                        if 0 <= answer <= 99999:
                            return answer, output
                    except:
                        pass
            
            return None, output
        finally:
            os.unlink(temp_file)
    except Exception as e:
        return None, str(e)

def extract_code_from_response(text: str) -> Optional[str]:
    """Extract Python code blocks from model response."""
    code_patterns = [
        r'```python\n(.*?)```',
        r'```\n(.*?)```',
    ]
    
    for pattern in code_patterns:
        matches = re.findall(pattern, text, re.DOTALL)
        if matches:
            return matches[-1].strip()
    
    return None

def extract_answer(text: str) -> Optional[int]:
    """Extract numerical answer from model output."""
    patterns = [
        r'\\boxed\{(\d{1,5})\}',
        r'#### (\d{1,5})',
        r'(?:final answer|answer|result|solution)(?:\s+is)?:?\s*(\d{1,5})',
        r'=\s*(\d{1,5})(?:\s|$|\.|,)',
        r'(\d{1,5})(?:\s+(?:is the|as the) answer)',
        r'therefore.*?(\d{1,5})',
    ]
    
    for pattern in patterns:
        matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL)
        if matches:
            try:
                answer = int(matches[-1])
                if 0 <= answer <= 99999:
                    return answer
            except ValueError:
                continue
    
    # Try to find any 1-5 digit number near end of text
    last_500 = text[-500:] if len(text) > 500 else text
    numbers = re.findall(r'\b(\d{1,5})\b', last_500)
    if numbers:
        try:
            answer = int(numbers[-1])
            if 0 <= answer <= 99999:
                return answer
        except:
            pass
    
    return None

def validate_answer(answer: int) -> bool:
    return isinstance(answer, int) and 0 <= answer <= 99999

In [None]:
class TimeSlicingSolver:
    """GPT-OSS-20B solver with time slicing and TIR."""
    
    def __init__(self):
        self.model = None
        self.tokenizer = None
        self.model_loaded = False
        
        # Time slicing strategies: different time allocations and approaches
        self.time_slices = [
            {'name': 'quick_solve', 'max_tokens': 1024, 'temp': 0.3, 'timeout': 5},
            {'name': 'deep_reasoning', 'max_tokens': 2048, 'temp': 0.5, 'timeout': 10},
            {'name': 'creative_search', 'max_tokens': 2048, 'temp': 0.8, 'timeout': 10},
            {'name': 'code_focused', 'max_tokens': 1536, 'temp': 0.4, 'timeout': 8},
        ]
    
    def _load_model(self):
        """Load GPT-OSS-20B model."""
        if self.model_loaded:
            return
        
        try:
            import torch
            from transformers import AutoTokenizer, AutoModelForCausalLM
            
            # Try Kaggle model input paths for GPT-OSS-20B
            # Format: /kaggle/input/{model-handle}/transformers/{version}
            possible_paths = [
                '/kaggle/input/gpt-oss-20b/transformers/default/1',
                '/kaggle/input/gpt-oss-20b/transformers',
                '/kaggle/input/gpt-oss-20b',
            ]
            
            model_path = None
            for path in possible_paths:
                print(f"Checking: {path}")
                if os.path.exists(path):
                    # Check if it has config.json
                    if os.path.exists(os.path.join(path, 'config.json')):
                        model_path = path
                        print(f"  ✓ Found config.json")
                        break
                    else:
                        print(f"  ✗ No config.json")
            
            if not model_path:
                print("\nGPT-OSS-20B not found in Kaggle inputs")
                print("Add model via metadata: danielhanchen/gpt-oss-20b/Transformers/default/1")
                print(f"Searched paths: {possible_paths}")
                return
            
            print(f"\nLoading GPT-OSS-20B from {model_path}...")
            print("Model is natively MXFP4 quantized (21B params, 3.6B active)")
            
            self.tokenizer = AutoTokenizer.from_pretrained(
                model_path,
                use_fast=True,
                trust_remote_code=True
            )
            
            # Load model with native MXFP4 quantization
            # 20B model should fit in 16GB GPU with bf16
            self.model = AutoModelForCausalLM.from_pretrained(
                model_path,
                torch_dtype=torch.bfloat16,
                device_map="auto",
                trust_remote_code=True,
                low_cpu_mem_usage=True,
            )
            
            self.model.eval()
            self.model_loaded = True
            
            print(f"✓ GPT-OSS-20B loaded successfully")
            
        except Exception as e:
            print(f"Failed to load GPT-OSS-20B: {e}")
            import traceback
            traceback.print_exc()
    
    def _get_prompt_for_slice(self, problem: str, slice_config: Dict) -> str:
        """Generate specialized prompt based on time slice strategy."""
        
        if slice_config['name'] == 'quick_solve':
            return f"""Solve this IMO problem quickly and concisely.

Problem: {problem}

Give a direct solution with the final answer as \\boxed{{answer}}."""
        
        elif slice_config['name'] == 'deep_reasoning':
            return f"""Solve this International Mathematical Olympiad problem with deep reasoning.

Problem:
{problem}

Think step-by-step:
1. Understand what's being asked
2. Identify the mathematical domain (algebra, number theory, combinatorics, geometry)
3. Apply relevant theorems and techniques
4. Work through the solution systematically
5. Verify your answer makes sense

Final answer (0-99999) in \\boxed{{answer}}."""
        
        elif slice_config['name'] == 'creative_search':
            return f"""Explore multiple approaches to solve this IMO problem.

Problem: {problem}

Try different strategies:
- Direct computation
- Pattern recognition
- Working backwards
- Special cases or symmetry

Show your work and give final answer as \\boxed{{answer}}."""
        
        else:  # code_focused
            return f"""Solve this IMO problem using Python code when helpful.

Problem:
{problem}

You can write Python code in ```python blocks to:
- Perform calculations
- Check patterns
- Verify solutions
- Solve equations

Show reasoning and code, then give final answer as \\boxed{{answer}}."""
    
    def _solve_with_time_slice(self, problem: str, slice_config: Dict) -> Optional[int]:
        """Solve using one time slice strategy."""
        if not self.model_loaded:
            return None
        
        try:
            import torch
            
            prompt = self._get_prompt_for_slice(problem, slice_config)
            
            # Use chat template if available
            if hasattr(self.tokenizer, 'chat_template') and self.tokenizer.chat_template:
                messages = [{"role": "user", "content": prompt}]
                text = self.tokenizer.apply_chat_template(
                    messages,
                    tokenize=False,
                    add_generation_prompt=True
                )
            else:
                text = prompt
            
            inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
            
            start_time = time.time()
            
            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=slice_config['max_tokens'],
                    temperature=slice_config['temp'],
                    do_sample=True,
                    top_p=0.9,
                    top_k=50,
                    pad_token_id=self.tokenizer.eos_token_id,
                )
            
            elapsed = time.time() - start_time
            
            response = self.tokenizer.decode(
                outputs[0][inputs['input_ids'].shape[1]:],
                skip_special_tokens=True
            )
            
            # Try Tool-Integrated Reasoning first
            code = extract_code_from_response(response)
            answer = None
            
            if code:
                code_answer, _ = execute_python_code(code, timeout=slice_config['timeout'])
                if code_answer is not None:
                    answer = code_answer
            
            # Fallback to text extraction
            if answer is None:
                answer = extract_answer(response)
            
            if answer is not None and validate_answer(answer):
                print(f"  [{slice_config['name']}] → {answer} ({elapsed:.1f}s)")
                return answer
            else:
                print(f"  [{slice_config['name']}] → no valid answer ({elapsed:.1f}s)")
            
            return None
            
        except Exception as e:
            print(f"  [{slice_config['name']}] error: {str(e)[:50]}")
            return None
    
    def solve_with_time_slicing(self, problem: str, num_samples: int = 3) -> Optional[int]:
        """Solve using multiple time slices and self-consistency."""
        self._load_model()
        
        if not self.model_loaded:
            return None
        
        all_answers = []
        
        print("  Running time-sliced inference...")
        
        # Run each time slice strategy
        for slice_config in self.time_slices:
            # Multiple samples per strategy for self-consistency
            for sample_idx in range(num_samples):
                answer = self._solve_with_time_slice(problem, slice_config)
                if answer is not None:
                    all_answers.append(answer)
        
        if not all_answers:
            return None
        
        # Voting with consensus
        vote_counts = Counter(all_answers)
        best_answer, best_count = vote_counts.most_common(1)[0]
        
        print(f"  Total samples: {len(all_answers)}")
        print(f"  Votes: {dict(vote_counts.most_common(3))}")
        print(f"  Consensus: {best_answer} ({best_count}/{len(all_answers)})")
        
        return best_answer
    
    def solve_problem(self, problem_id: str, problem_text: str) -> int:
        """Main solving interface."""
        answer = self.solve_with_time_slicing(problem_text, num_samples=3)
        
        if answer is None:
            answer = 0
        
        if not validate_answer(answer):
            answer = abs(answer) % 100000
        
        return answer

solver = TimeSlicingSolver()

In [None]:
print("=" * 70)
print("GPT-OSS-20B with Time Slicing & TIR")
print("=" * 70)
print("Approach:")
print("  ✓ GPT-OSS-20B (21B params, 3.6B active)")
print("  ✓ Time Slicing: 4 strategies × 3 samples = 12 attempts")
print("  ✓ Tool-Integrated Reasoning (Python execution)")
print("  ✓ Self-consistency voting")
print("")
print("Time Slice Strategies:")
print("  1. Quick Solve (1024 tokens, temp=0.3, 5s timeout)")
print("  2. Deep Reasoning (2048 tokens, temp=0.5, 10s timeout)")
print("  3. Creative Search (2048 tokens, temp=0.8, 10s timeout)")
print("  4. Code Focused (1536 tokens, temp=0.4, 8s timeout)")
print("=" * 70)

def predict(id_: pl.Series, problem: pl.Series) -> pl.DataFrame:
    """Prediction function for inference server."""
    try:
        question_id = id_.item(0)
        question_text = problem.item(0)
        answer = solver.solve_problem(question_id, question_text)
        return pl.DataFrame({"id": [question_id], "answer": [answer]})
    except Exception as e:
        print(f"Prediction error: {e}")
        return pl.DataFrame({"id": [id_.item(0)], "answer": [0]})

inference_server = kaggle_evaluation.aimo_3_inference_server.AIMO3InferenceServer(predict)

# Auto-detect: competition rerun vs local testing
if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    print("\n[COMPETITION MODE] Starting inference server...")
    inference_server.serve()
else:
    print("\n[LOCAL MODE] Running local gateway on test.csv...")
    inference_server.run_local_gateway(
        ('/kaggle/input/ai-mathematical-olympiad-progress-prize-3/test.csv',)
    )