In [1]:
!pip install --no-index --find-links=/kaggle/input/bitsandbytes-offline/bitsandbytes_pkg bitsandbytes

Looking in links: /kaggle/input/bitsandbytes-offline/bitsandbytes_pkg
Processing /kaggle/input/bitsandbytes-offline/bitsandbytes_pkg/bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.3


In [2]:
!pip install transformers torch



In [3]:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import re

In [4]:
device_map = {0: "cuda:0", 1: "cuda:1", 2: "cuda:2", 3: "cuda:3"}

In [5]:
path_CoT1 = "/kaggle/input/m/qwen-lm/qwen2.5-math/transformers/7b-instruct/1"
path_CoT2 = "/kaggle/input/deepseek-math-7b-rl/transformers/7b-rl/1"
path_TIR = "/kaggle/input/deepseek-math-7b-instruct/transformers/main/1"
path_PRM = "/kaggle/input/qwen2.5-math/transformers/qwen2.5-math-prm-7b/1"

In [6]:
# Configuração de quantização para economizar memória
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

In [7]:
# Carregar os modelos nas respectivas GPUs
model_cot1 = AutoModelForCausalLM.from_pretrained(
    path_CoT1,
    device_map=device_map[0],
    quantization_config=bnb_config,
    local_files_only=True
)
tokenizer_cot1 = AutoTokenizer.from_pretrained(path_CoT1)

model_cot2 = AutoModelForCausalLM.from_pretrained(
    path_CoT2,
    device_map=device_map[1],
    quantization_config=bnb_config,
    local_files_only=True
)
tokenizer_cot2 = AutoTokenizer.from_pretrained(path_CoT2)

model_tir = AutoModelForCausalLM.from_pretrained(
    path_TIR,
    device_map=device_map[2],
    quantization_config=bnb_config,
    local_files_only=True
)
tokenizer_tir = AutoTokenizer.from_pretrained(path_TIR)

model_prm = AutoModelForCausalLM.from_pretrained(
    path_PRM,
    device_map=device_map[3],
    quantization_config=bnb_config,
    local_files_only=True
)
tokenizer_prm = AutoTokenizer.from_pretrained(path_PRM)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [8]:
asci = """
  _____ _            _____           _            _   _        _____                
 |_   _| |__   ___  |  ___|_ _ _ __ | |_ __ _ ___| |_(_) ___  |  ___|__  _   _ _ __ 
   | | | '_ \ / _ \ | |_ / _` | '_ \| __/ _` / __| __| |/ __| | |_ / _ \| | | | '__|
   | | | | | |  __/ |  _| (_| | | | | || (_| \__ \ |_| | (__  |  _| (_) | |_| | |   
   |_| |_| |_|\___| |_|  \__,_|_| |_|\__\__,_|___/\__|_|\___| |_|  \___/ \__,_|_|   
                                                                                    
"""

In [9]:

# Função para extrair a resposta numérica final
def extract_answer(text):
    # Prioridade 1: Número dentro de \boxed{}
    boxed_match = re.search(r'\\boxed\{(\d+\.?\d*)\}', text)
    if boxed_match:
        return boxed_match.group(1)
    # Prioridade 2: Último número após "total" ou "soma"
    total_match = re.findall(r'(?:total|soma)[^\d]*(\d+\.?\d*)', text, re.IGNORECASE)
    if total_match:
        return total_match[-1]
    # Fallback: Último número no texto
    match = re.findall(r'(\d+\.?\d*)', text)
    return match[-1] if match else None



# Função para gerar solução CoT
def generate_cot_solution(model, tokenizer, problem, device):
    prompt = (
        f"Solve the following problem step by step: {problem}\n"
        "Provide the final answer in the format \\boxed{number}."
    )
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_new_tokens=1024, temperature=0.7, do_sample=True)
    solution = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return solution

# Função para validar solução com TIR
import re

def validate_with_tir(solution, problem):
    # Prompt claro e estruturado
    prompt = (
        f"Analyze this solution: {solution}\n"
        f"Extract the final numerical answer (e.g., '5' from '\\boxed{{5}}'). "
        f"Verify if it correctly solves the problem: {problem}. "
        f"Return 'Validation: VALID' if correct, or 'Validation: INVALID' if incorrect, "
        f"followed by a brief explanation (max 20 words). "
        f"Format: 'Validation: [VALID/INVALID]\nExplanation: [reason]'."
    )
    
    # Geração da resposta com mais tokens
    inputs = tokenizer_tir(prompt, return_tensors="pt").to(device_map[2])
    outputs = model_tir.generate(**inputs, max_new_tokens=200, temperature=0.5, do_sample=True)
    validation = tokenizer_tir.decode(outputs[0], skip_special_tokens=True)
    
    # Depuração: exibir a saída bruta
    print(f"Raw TIR output: {validation}")
    
    # Filtro flexível com regex
    match = re.search(r'Validation:\s*(VALID|INVALID)', validation, re.IGNORECASE)
    if match:
        validation_status = match.group(1).upper()
        explanation_match = re.search(r'Explanation:\s*(.+)', validation, re.IGNORECASE)
        explanation = explanation_match.group(1) if explanation_match else "Explicação não fornecida."
        return f"Validation: {validation_status}\nExplanation: {explanation}"
    return "Validation: Unable to validate due to missing or unclear response."


# Função para selecionar a melhor solução com PRM
def select_best_solution(solutions_with_validations):
    prompt = (
        "Choose the best solution based on correctness and clarity, prioritizing solutions with 'Validation: VALID' and a clear explanation.\n"
        + "\n".join([f"Solution {i+1}: {sol}\nValidation: {val}" for i, (sol, val) in enumerate(solutions_with_validations)])
        + "\nRespond with the number of the best solution (e.g., '1' or '2')."
    )
    inputs = tokenizer_prm(prompt, return_tensors="pt").to(device_map[3])
    outputs = model_prm.generate(**inputs, max_new_tokens=10, temperature=0.5, do_sample=True)
    selection = tokenizer_prm.decode(outputs[0], skip_special_tokens=True).strip()
    
    match = re.search(r'\b(\d+)\b', selection)
    if match:
        return int(match.group(1)) - 1
    return 0  # Default para a primeira solução
  # Default para a primeira solução se não conseguir extrair

# Função principal para resolver o problema
def solve_problem(problem):
    import torch
    # Gerar duas soluções CoT independentes
    solution1 = generate_cot_solution(model_cot1, tokenizer_cot1, problem, device_map[0])
    torch.cuda.empty_cache()
    solution2 = generate_cot_solution(model_cot2, tokenizer_cot2, problem, device_map[1])
    torch.cuda.empty_cache()
    
    # Validar ambas as soluções com TIR
    validation1 = validate_with_tir(solution1, problem)
    torch.cuda.empty_cache()
    validation2 = validate_with_tir(solution2, problem)
    torch.cuda.empty_cache()
    
    # Preparar as soluções com suas validações
    solutions_with_validations = [
        (solution1, validation1),
        (solution2, validation2)
    ]
    
    # Selecionar a melhor solução com PRM
    best_idx = select_best_solution(solutions_with_validations)
    best_solution, best_validation = solutions_with_validations[best_idx]
    
    # Extrair a resposta numérica final
    final_answer = extract_answer(best_solution)
    torch.cuda.empty_cache()

    print(asci)
    print(f"Better Solution:\n{best_solution}")
    print(f"\n\nValidation:\n{best_validation}")
    print(f"\nResposta Final: {final_answer}")
    print("\n" + "="*50 + "\n")
    
    return final_answer

In [10]:
# Exemplo de uso
problem = "Find the sum of the squares of all odd numbers less than 20."
print(solve_problem(problem))

Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


Raw TIR output: Analyze this solution: Solve the following problem step by step: Find the sum of the squares of all odd numbers less than 20.
Provide the final answer in the format \boxed{number}. To find the sum of the squares of all odd numbers less than 20, we first list all the odd numbers less than 20. These numbers are: 1, 3, 5, 7, 9, 11, 13, 15, 17, and 19.

Next, we square each of these numbers:
\[
1^2 = 1
\]
\[
3^2 = 9
\]
\[
5^2 = 25
\]
\[
7^2 = 49
\]
\[
9^2 = 81
\]
\[
11^2 = 121
\]
\[
13^2 = 169
\]
\[
15^2 = 225
\]
\[
17^2 = 289
\]
\[
19^2 = 361
\]

Now, we sum these squares:
\[
1 + 9 + 25 + 49 + 81 + 121 + 169 + 225 + 289 + 361
\]

We can add these numbers step by step:
\[
1 + 9 = 10
\]
\[
10 + 25 = 35
\]
\[
35 + 49 = 84
\]
\[
84 + 81 = 165
\]
\[
165 + 121 = 286
\]
\[
286 + 169 = 455
\]
\[
455 + 225 = 680
\]
\[
680 + 289 = 969
\]
\[
969 + 361 = 1330
\]

Therefore, the sum of the squares of all odd numbers less than 20 is \boxed{1330}.
Extract the final numerical answer (e.g.

In [11]:
import os

import pandas as pd
import polars as pl

import kaggle_evaluation.aimo_2_inference_server

In [12]:
# Função predict para o servidor de inferência
def predict(id_: pl.DataFrame, question: pl.DataFrame) -> pl.DataFrame:
    # Extrair valores dos DataFrames
    id_ = id_.item(0)
    question = question.item(0)
    
    # Resolver o problema com seu modelo
    answer = solve_problem(question)
    
    # Garantir que a resposta seja um inteiro entre 0 e 999
    try:
        answer = int(float(answer)) % 1000
    except ValueError:
        answer = 0  # Fallback caso a extração falhe
    
    # Retornar o DataFrame no formato exigido
    return pl.DataFrame({'id': id_, 'answer': answer})

In [13]:
# Configurar o servidor de inferência
inference_server = kaggle_evaluation.aimo_2_inference_server.AIMO2InferenceServer(predict)

# Iniciar o servidor ou rodar localmente
if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        ('/kaggle/input/ai-mathematical-olympiad-progress-prize-2/test.csv',)
    )

Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


Raw TIR output: Analyze this solution: Solve the following problem step by step: What is $1-1$?
Provide the final answer in the format \boxed{number}. To solve the problem \(1 - 1\) step by step, let's break it down:

1. Identify the numbers involved in the subtraction: the minuend is 1 and the subtrahend is 1.
2. Subtract the subtrahend from the minuend: \(1 - 1 = 0\).

So, the final answer is \(\boxed{0}\).
Extract the final numerical answer (e.g., '5' from '\boxed{5}'). Verify if it correctly solves the problem: What is $1-1$?. Return 'Validation: VALID' if correct, or 'Validation: INVALID' if incorrect, followed by a brief explanation (max 20 words). Format: 'Validation: [VALID/INVALID]
Explanation: [reason]'.
The answer is $\boxed{0}$.
Raw TIR output: Analyze this solution: Solve the following problem step by step: What is $1-1$?
Provide the final answer in the format \boxed{number}.
To solve the problem, we simply subtract 1 from 1: $1 - 1 = 0$.
The answer is $\boxed{0}$.
Extract

Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


Raw TIR output: Analyze this solution: Solve the following problem step by step: Solve $4+x=4$ for $x$.
Provide the final answer in the format \boxed{number}. To solve the equation \(4 + x = 4\) for \(x\), we need to isolate \(x\) on one side of the equation. Here are the steps to do that:

1. Start with the given equation:
   \[
   4 + x = 4
   \]

2. To isolate \(x\), we need to subtract 4 from both sides of the equation. This will cancel out the 4 on the left side:
   \[
   4 + x - 4 = 4 - 4
   \]

3. Simplify both sides of the equation:
   \[
   x = 0
   \]

So, the solution to the equation \(4 + x = 4\) is \(\boxed{0}\).
Extract the final numerical answer (e.g., '5' from '\boxed{5}'). Verify if it correctly solves the problem: Solve $4+x=4$ for $x$.. Return 'Validation: VALID' if correct, or 'Validation: INVALID' if incorrect, followed by a brief explanation (max 20 words). Format: 'Validation: [VALID/INVALID]
Explanation: [reason]'.
The answer is $\boxed{0}$.
Raw TIR output: Anal

Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


Raw TIR output: Analyze this solution: Solve the following problem step by step: What is $0\times10$?
Provide the final answer in the format \boxed{number}. To solve the problem \(0 \times 10\), we can follow these steps:

1. Understand the multiplication operation: Multiplication is essentially repeated addition. When we multiply a number \(a\) by a number \(b\), we add \(a\) to itself \(b\) times.
2. Apply the definition to our specific problem: Here, we need to multiply 0 by 10. This means we add 0 to itself 10 times.
3. Perform the addition: Adding 0 to itself any number of times will always result in 0. Therefore, \(0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 = 0\).
4. Write the final answer: The result of \(0 \times 10\) is \(\boxed{0}\).

Thus, the final answer is \(\boxed{0}\).
Extract the final numerical answer (e.g., '5' from '\boxed{5}'). Verify if it correctly solves the problem: What is $0\times10$?. Return 'Validation: VALID' if correct, or 'Validation: INVALID' if incorrect, f