In [1]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["TRITON_PTXAS_PATH"] = "/usr/local/cuda/bin/ptxas"

import re
import random
import warnings
from collections import Counter
import numpy as np
import pandas as pd
import polars as pl
import torch
import vllm
import requests
import json
import hmac
import hashlib
import base64
import urllib.parse
import datetime
import time
import gc
from vllm import LLM, SamplingParams
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
import torch._dynamo

warnings.simplefilter('ignore')
print('PyTorch Version:', torch.__version__)
print('vLLM:', vllm.__version__)


INFO 05-05 23:23:29 __init__.py:183] Automatically detected platform cuda.
PyTorch Version: 2.5.1+cu124
vLLM: 0.7.1


In [2]:
all_predictions_df = None
xls_start_time = None
correct_count = 0

TORCH_LOGS="+dynamo"
TORCHDYNAMO_VERBOSE=1
torch._dynamo.config.suppress_errors = True
correct_count = 0

import kaggle_evaluation.aimo_2_inference_server

prompt1 = """You are a helpful and harmless assistant. You are Qwen developed by Alibaba. 
You should think step-by-step. Return final answer within \\boxed{}, after taking modulo 1000."""

prompt2 = """You are a the most powerful math expert. Please solve the problems with deep resoning. 
You are careful and always recheck your conduction. You will never give answer directly until you have enough confidence. 
You should think step-by-step. Return final answer within \\boxed{}, after taking modulo 1000."""

possible_paths = [
    '/kaggle/input/competition-data/train.csv',         
    # '/root/AIMO2/reference.csv',      
]

# 查找第一个存在的文件路径
reference_path = None
for path in possible_paths:
    if os.path.exists(path):
        reference_path = path
        break

if reference_path:
    try:
        df_ref = pd.read_csv(reference_path)
        if 'Question' not in df_ref.columns:
            raise ValueError(f"CSV at {reference_path} missing required columns ")
        
        question_label_map = dict(zip(df_ref['Question'], df_ref['label']))
        print(f"Successfully loaded reference data from {reference_path} with {len(df_ref)} problems")
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        raise
else:
    raise FileNotFoundError("Cannot find reference.csv in any of the expected locations")

Successfully loaded reference data from /kaggle/input/competition-data/train.csv with 10189 problems


In [3]:
if os.getenv('KAGGLE_KERNEL_RUN_TYPE') or os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    llm_model_7B_pth = '/kaggle/input/deepseek-r1/transformers/deepseek-r1-distill-qwen-7b-awq-casperhansen/1'
else:
    llm_model_7B_pth = '/kaggle/input/deepseek-r1-distill-qwen-7b/transformers/deepseek-ai-deepseek-r1-distill-qwen-7b/1'

In [4]:
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True
seed_everything(seed=0)

start_time = time.time()
cutoff_time = start_time + (4 * 60 + 50) * 60
cutoff_times = [int(x) for x in np.linspace(cutoff_time, start_time + 60 * 60, 50 + 1)]

MAX_NUM_SEQS = 16
MAX_MODEL_LEN = 8192 * 3 // 2

llm = LLM(
    llm_model_7B_pth,
    max_num_seqs=MAX_NUM_SEQS,              max_model_len=MAX_MODEL_LEN * 2,       trust_remote_code=True,                tensor_parallel_size=4,                gpu_memory_utilization=0.95,          seed=random.randint(1, 10000),
)
tokenizer = llm.get_tokenizer()


INFO 05-05 23:24:05 config.py:526] This model supports multiple tasks: {'embed', 'generate', 'score', 'reward', 'classify'}. Defaulting to 'generate'.
INFO 05-05 23:24:09 awq_marlin.py:109] The model is convertible to awq_marlin during runtime. Using awq_marlin kernel.
INFO 05-05 23:24:09 config.py:1383] Defaulting to use mp for distributed inference
INFO 05-05 23:24:09 llm_engine.py:232] Initializing a V0 LLM engine (v0.7.1) with config: model='/kaggle/input/deepseek-r1/transformers/deepseek-r1-distill-qwen-7b-awq-casperhansen/1', speculative_config=None, tokenizer='/kaggle/input/deepseek-r1/transformers/deepseek-r1-distill-qwen-7b-awq-casperhansen/1', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=24576, download_dir=None, load_format=auto, tensor_parallel_size=4, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq_marlin, enforce_eag

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]


[1;36m(VllmWorkerProcess pid=351)[0;0m INFO 05-05 23:24:58 model_runner.py:1116] Loading model weights took 1.3375 GB
INFO 05-05 23:24:58 model_runner.py:1116] Loading model weights took 1.3375 GB
[1;36m(VllmWorkerProcess pid=354)[0;0m INFO 05-05 23:24:58 model_runner.py:1116] Loading model weights took 1.3375 GB
[1;36m(VllmWorkerProcess pid=359)[0;0m INFO 05-05 23:24:58 model_runner.py:1116] Loading model weights took 1.3375 GB
INFO 05-05 23:25:20 worker.py:266] Memory profiling takes 20.87 seconds
[1;36m(VllmWorkerProcess pid=354)[0;0m [1;36m(VllmWorkerProcess pid=351)[0;0m [1;36m(VllmWorkerProcess pid=359)[0;0m INFO 05-05 23:25:20 worker.py:266] Memory profiling takes 20.87 seconds
INFO 05-05 23:25:20 worker.py:266] the current vLLM instance can use total_gpu_memory (22.28GiB) x gpu_memory_utilization (0.95) = 21.16GiB
[1;36m(VllmWorkerProcess pid=351)[0;0m [1;36m(VllmWorkerProcess pid=354)[0;0m [1;36m(VllmWorkerProcess pid=359)[0;0m INFO 05-05 23:25:20 worker.p

Capturing CUDA graph shapes:   0%|          | 0/5 [00:00<?, ?it/s]

INFO 05-05 23:25:25 model_runner.py:1435] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI. If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.


Capturing CUDA graph shapes: 100%|██████████| 5/5 [00:05<00:00,  1.05s/it]

[1;36m(VllmWorkerProcess pid=354)[0;0m [1;36m(VllmWorkerProcess pid=351)[0;0m INFO 05-05 23:25:31 model_runner.py:1563] Graph capturing finished in 5 secs, took 0.05 GiB
[1;36m(VllmWorkerProcess pid=359)[0;0m INFO 05-05 23:25:31 model_runner.py:1563] Graph capturing finished in 5 secs, took 0.05 GiB
INFO 05-05 23:25:31 model_runner.py:1563] Graph capturing finished in 5 secs, took 0.05 GiB
INFO 05-05 23:25:31 model_runner.py:1563] Graph capturing finished in 5 secs, took 0.05 GiB
INFO 05-05 23:25:31 llm_engine.py:429] init engine (profile, create kv cache, warmup model) took 32.16 seconds





In [5]:
df = df_ref.copy()

In [6]:
def create_reasoning_prompt(question: str) -> str:
    return f"""You are a mathematical assistant helping to analyze the structure of math problems. Given a question, briefly describe what it is asking and mention the main mathematical concepts or theorems likely involved.

Keep your explanation short and focused less than 100 tokens . Do not solve or classify the problem.

---
Problem:
{question}

Brief analysis:"""

In [7]:
max_tokens = 200

print(f"***** Using max_tokens: {max_tokens}")

sampling_params = SamplingParams(
    temperature=1.0,               # Randomness of the sampling
    top_p=0.90,                    # Cumulative probability of the top tokens to consider
    min_p=0.05,                    # Minimum probability for a token to be considered
    skip_special_tokens=True,      # Whether to skip special tokens in the output
    max_tokens=max_tokens,         # Maximum number of tokens to generate
    stop=["</think>"],             # List of strings that stop the generation
    seed=random.randint(1, 10000),  )

***** Using max_tokens: 200


In [8]:
def generate_reasoning(question):
    prompt = create_reasoning_prompt(question)
    outputs = llm.generate([prompt], sampling_params)
    reasoning = outputs[0].outputs[0].text.strip()
    return reasoning

In [9]:
df2 = df.copy()

In [10]:
# df2['reasoning'] = df2['Question'].apply(generate_reasoning)

In [11]:
prompts = [create_reasoning_prompt(q) for q in df2["Question"].tolist()]

In [12]:
outputs = llm.generate(prompts, sampling_params)

Processed prompts: 100%|██████████| 10189/10189 [34:54<00:00,  4.87it/s, est. speed input: 759.03 toks/s, output: 854.52 toks/s]


In [13]:
reasonings = [output.outputs[0].text.strip() for output in outputs]

In [14]:
df2["Reasoning"] = reasonings

In [15]:
df2["Reasoning"] = df2["Reasoning"].apply(lambda x : ".".join(x.split(".")[:-1]) )

In [16]:
df2.loc[6619]['Reasoning']

"The problem involves the construction of sequences with specific properties related to power sums. The key is to understand how to maximize $d$ given that the same condition must hold for all $f$ from 1 to $d$.\n\nTo solve this, we can consider the concept of simultaneous equations where each equation corresponds to a different power of $2f-1$. The goal is to ensure that these equations can be satisfied simultaneously by adjusting the values of $x_i$ appropriately.\n\nAnother approach is to analyze the structure of the equations and determine the maximum number of such conditions that can be imposed on the sequence without conflicting with each other.\n\nTo find the maximum $d$, it's necessary to consider the interplay between the increasing nature of the $x_i$ sequence and the constraints imposed by the power sums equalling 1 for each $f$.\n\nLooking at the problem, I notice that the exponents are odd numbers: 1, 3, 5, .."

In [17]:
test = pd.read_csv('/kaggle/input/competition-data/test.csv')
prompts = [create_reasoning_prompt(q) for q in test["Question"].tolist()]
outputs = llm.generate(prompts, sampling_params)
reasonings = [output.outputs[0].text.strip() for output in outputs]
test["Reasoning"] = reasonings
test["Reasoning"] = test["Reasoning"].apply(lambda x : ".".join(x.split(".")[:-1]) )

Processed prompts: 100%|██████████| 3044/3044 [10:24<00:00,  4.87it/s, est. speed input: 764.42 toks/s, output: 853.18 toks/s]


In [18]:
df2.to_csv('train_reasoning.csv' , index = False)
test.to_csv('test_reasoning.csv' , index = False)