In [1]:
# credits:
# https://www.kaggle.com/code/olyatsimboy/aimo-openmath-mistral-baseline
# https://www.kaggle.com/code/aatiffraz/prompt-prediction-w-mixtral-mistral7b-gemma-llama
# https://www.kaggle.com/code/thedrcat/aimo-mixtral-baseline

# Zero-shot MMOS-DeepSeekMath-7B with self-consistency and generated code reasoning evaluation

Self-consistency is a modification of the standard greedy decoding in reasoning pipelines via sampling several diverse answers followed by aggregation, e.g., most common answer ([SC-CoT paper](https://arxiv.org/pdf/2203.11171.pdf)).

In this kernel, we will consider MMOS-DeepSeekMath-7B RL-tuned backbone; in my experiments, this model produces more consistent code reasoning and the code block execution will allow us to decrease arithmetic hallucinations.

In [2]:
!pip install -U /kaggle/input/bitsandbytes-0-42-0-py3-none-any-whl/bitsandbytes-0.42.0-py3-none-any.whl -qq

In [3]:
import torch
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    BitsAndBytesConfig, 
    AutoConfig,
    set_seed
)

set_seed(42)

MODEL_PATH = "/kaggle/input/deepseek-math"

quantization_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

config = AutoConfig.from_pretrained(MODEL_PATH)
config.gradient_checkpointing = True


tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="auto",
    torch_dtype="auto",
    trust_remote_code=True,
#     quantization_config=quantization_config,
    config=config
)

2024-04-08 11:05:43.817447: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-08 11:05:43.817565: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-08 11:05:44.004383: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
model.dtype

torch.bfloat16

In [5]:
import pandas as pd
from tqdm import tqdm
PRIVATE = True

df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/test.csv')
df.head()

Unnamed: 0,id,problem
0,000aaa,What is $1-1$?
1,111bbb,What is $0\times10$?
2,222ccc,Solve $4+x=4$ for $x$.


In [6]:
if len(df) < 5:
    df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
    PRIVATE = False
df.head()

Unnamed: 0,id,problem,answer
0,229ee8,"Let $k, l > 0$ be parameters. The parabola $y ...",52
1,246d26,Each of the three-digits numbers $111$ to $999...,250
2,2fc4ad,Let the `sparkle' operation on positive intege...,702
3,430b63,What is the minimum value of $5x^2+5y^2-8xy$ w...,800
4,5277ed,There exists a unique increasing geometric seq...,211


In [7]:
import gc
device = 'cuda'

In [8]:
def naive_parse(answer):
    out = []
    start = False
    end = False
    for l in reversed(list(answer)):
        if l in '0123456789' and not end:
            start = True
            out.append(l)
        else:
            if start:
                end = True
        
    out = reversed(out)
    return ''.join(out)

In [9]:
import transformers

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype='auto',
    device_map="auto",
)

In [10]:
print(f"Transformers Version: {transformers.__version__}")

Transformers Version: 4.38.2


In [11]:
import torch

torch.backends.cuda.enable_mem_efficient_sdp(False)

In [12]:
import re
import sys
import subprocess


def process_output(output):
    result = output
    
    try:
        code = output.split('```')[1][7:]

        with open('code.py', 'w') as fout:
            fout.write(code)

        batcmd = 'timeout 7 ' + sys.executable + ' code.py'
        try:
            shell_output = subprocess.check_output(batcmd, shell=True).decode('utf8')
            print(shell_output)
            code_output = round(float(eval(shell_output))) % 1000
        except:
            code_output = -1

        print('CODE RESULTS', code_output)
    
    except Exception as e:
        print(e)
        print('ERROR PARSING')
        code_output = -1
    
    try:
        result_output = re.findall(r'\\boxed\{(.*)\}', result)

        print('BOXED', result_output)
        if not len(result_output):
            result_output = naive_parse(result)
        else:
            result_output = result_output[-1]

        print('BOXED', result_output)
        if not len(result_output):
            result_output = -1
        
        else:
            result_output = round(float(eval(result_output))) % 1000
    
    except Exception as e:
        print(e)
        print('ERROR PARSING')
        result_output = -1
    
    return result_output, code_output

In [13]:
import re
from collections import defaultdict


tool_instruction0 = " The answer should be given as a non-negative modulo 1000."
tool_instruction0 += '\nPlease integrate natural language reasoning with programs to solve the problem above, and put your final answer within \\boxed{}.'
tool_instruction1 = "\nThe answer should be given as a non-negative modulo 1000."
tool_instruction1 += '\nPlease integrate natural language reasoning with programs to solve the problem above, and put your final answer within \\boxed{}.'
tool_instruction2 = " The answer should be given as a non-negative modulo 1000."
tool_instruction2 += '\n\nPlease integrate natural language reasoning with programs to solve the problem above, and put your final answer within \\boxed{}.'
tool_instruction3 = "\nThe answer should be given as a non-negative modulo 1000."
tool_instruction3 += '\n\nPlease integrate natural language reasoning with programs to solve the problem above, and put your final answer within \\boxed{}.'
tool_instruction4 = " The answer should be given as a non-negative modulo 1000."
tool_instruction4 += '\nPlease integrate natural language reasoning with python programs to solve the problem above, and put your final answer within \\boxed{}.'

tool_instructions = [tool_instruction0, tool_instruction1, tool_instruction2, tool_instruction3, tool_instruction4]

n_repetitions = 5 if PRIVATE else 2

total_results = []
total_answers = []

for i in tqdm(range(len(df))):
    id_ = df['id'].loc[i]
    problem = df['problem'].loc[i]
    
    results = []
    answers = []
    
    for i in tqdm(range(n_repetitions)):
        try:
            tool_instruction = tool_instructions[i]
            
            messages = [
                {
                    "role": "user", 
                    "content": problem + tool_instruction
                }
            ]
    
            query_prompt = tokenizer.apply_chat_template(
                messages,
                tokenize=False
            )
            
            raw_output = pipeline(
                query_prompt, 
                max_new_tokens=2048, 
                do_sample=True, 
                temperature=0.7,
                return_full_text=False
            )
            raw_output = raw_output[0]['generated_text']

            result_output, code_output = process_output(raw_output)

            torch.cuda.empty_cache()
            gc.collect()

        except Exception as e:
            print(e)
            result_output, code_output = -1, -1
        
        results.append(result_output)
        answers.append(code_output)
    
    total_results.append(results)
    total_answers.append(answers)

  0%|          | 0/10 [00:00<?, ?it/s]
  0%|          | 0/2 [00:00<?, ?it/s][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


504

CODE RESULTS 504
BOXED ['504']
BOXED 504



 50%|█████     | 1/2 [02:01<02:01, 121.69s/it][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Traceback (most recent call last):
  File "/kaggle/working/code.py", line 37, in <module>
    result = sum_of_squares_distances()
  File "/kaggle/working/code.py", line 16, in sum_of_squares_distances
    solutions = solve(parabola - line, x)
UnboundLocalError: local variable 'solve' referenced before assignment


CODE RESULTS -1
BOXED ['13']
BOXED 13



100%|██████████| 2/2 [03:50<00:00, 115.38s/it]
 10%|█         | 1/10 [03:50<34:36, 230.76s/it]
  0%|          | 0/2 [00:00<?, ?it/s][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


list index out of range
ERROR PARSING
BOXED []
BOXED 500



 50%|█████     | 1/2 [00:26<00:26, 26.46s/it][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


list index out of range
ERROR PARSING
BOXED ['0']
BOXED 0



100%|██████████| 2/2 [02:12<00:00, 66.20s/it]
 20%|██        | 2/10 [06:03<23:03, 172.91s/it]
  0%|          | 0/2 [00:00<?, ?it/s][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Traceback (most recent call last):
  File "/kaggle/working/code.py", line 17, in <module>
    result = sparkle(36)
  File "/kaggle/working/code.py", line 10, in sparkle
    num = sum(int(digit) for digit in str(num))
ValueError: Exceeds the limit (4300) for integer string conversion; use sys.set_int_max_str_digits() to increase the limit


CODE RESULTS -1
BOXED ['415}$ special numbers with at most 36 digits. The answer is $\\boxed{415']
BOXED 415}$ special numbers with at most 36 digits. The answer is $\boxed{415
unmatched '}' (<string>, line 1)
ERROR PARSING



 50%|█████     | 1/2 [00:55<00:55, 55.25s/it][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Traceback (most recent call last):
  File "/kaggle/working/code.py", line 24, in <module>
    result = special_numbers()
  File "/kaggle/working/code.py", line 19, in special_numbers
    if is_special(i):
  File "/kaggle/working/code.py", line 14, in is_special
    n = sparkle(n)
  File "/kaggle/working/code.py", line 8, in sparkle
    return factorial(sum(int(digit) for digit in str(n)))
  File "/opt/conda/lib/python3.10/site-packages/sympy/core/_print_helpers.py", line 29, in __str__
    return sstr(self, order=None)
  File "/opt/conda/lib/python3.10/site-packages/sympy/printing/printer.py", line 372, in __call__
    return self.__wrapped__(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/sympy/printing/str.py", line 998, in sstr
    s = p.doprint(expr)
  File "/opt/conda/lib/python3.10/site-packages/sympy/printing/printer.py", line 292, in doprint

CODE RESULTS -1
BOXED ['156']
BOXED 156



100%|██████████| 2/2 [01:53<00:00, 56.84s/it]
 30%|███       | 3/10 [07:56<17:01, 145.86s/it]
  0%|          | 0/2 [00:00<?, ?it/s][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Traceback (most recent call last):
  File "/kaggle/working/code.py", line 22, in <module>
    result = min_value()
  File "/kaggle/working/code.py", line 12, in min_value
    y_sol = solve(constraint, y)[0]
  File "/opt/conda/lib/python3.10/site-packages/sympy/solvers/solvers.py", line 1007, in solve
    raise NotImplementedError('solving %s when the argument '
NotImplementedError: solving Abs(x - 2*y) when the argument is not real or imaginary.


CODE RESULTS -1
BOXED ['560']
BOXED 560



 50%|█████     | 1/2 [01:13<01:13, 73.38s/it][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


800.0

CODE RESULTS 800
BOXED []
BOXED 375



100%|██████████| 2/2 [02:08<00:00, 64.48s/it]
 40%|████      | 4/10 [10:05<13:55, 139.19s/it]
  0%|          | 0/2 [00:00<?, ?it/s][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


310

CODE RESULTS 310
BOXED []
BOXED 630



 50%|█████     | 1/2 [01:19<01:19, 79.02s/it][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


496

CODE RESULTS 496
BOXED ['546']
BOXED 546



100%|██████████| 2/2 [02:41<00:00, 80.88s/it]
 50%|█████     | 5/10 [12:47<12:16, 147.33s/it]
Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


100

CODE RESULTS 100
BOXED ['1']
BOXED 1



 50%|█████     | 1/2 [01:22<01:22, 82.30s/it][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Traceback (most recent call last):
  File "/kaggle/working/code.py", line 17, in <module>
    result = count_solutions()
  File "/kaggle/working/code.py", line 11, in count_solutions
    solutions = solve(equation, x)
  File "/opt/conda/lib/python3.10/site-packages/sympy/solvers/solvers.py", line 1007, in solve
    raise NotImplementedError('solving %s when the argument '
NotImplementedError: solving Abs(x - 1) when the argument is not real or imaginary.


CODE RESULTS -1
BOXED ['1']
BOXED 1



100%|██████████| 2/2 [02:09<00:00, 64.60s/it]
 60%|██████    | 6/10 [14:56<09:24, 141.17s/it]
  0%|          | 0/2 [00:00<?, ?it/s][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


list index out of range
ERROR PARSING
BOXED ['967']
BOXED 967



 50%|█████     | 1/2 [00:58<00:58, 58.34s/it][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


list index out of range
ERROR PARSING
BOXED ['967']
BOXED 967



100%|██████████| 2/2 [02:41<00:00, 80.93s/it]
 70%|███████   | 7/10 [17:38<07:23, 147.93s/it]
  0%|          | 0/2 [00:00<?, ?it/s][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


256

CODE RESULTS 256
BOXED ['256']
BOXED 256



 50%|█████     | 1/2 [01:06<01:06, 66.98s/it][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


256

CODE RESULTS 256
BOXED ['256']
BOXED 256



100%|██████████| 2/2 [02:38<00:00, 79.45s/it]
 80%|████████  | 8/10 [20:17<05:02, 151.42s/it]
  0%|          | 0/2 [00:00<?, ?it/s][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


436

CODE RESULTS 436
BOXED ['47']
BOXED 47



 50%|█████     | 1/2 [00:53<00:53, 53.18s/it][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


list index out of range
ERROR PARSING
BOXED ['479']
BOXED 479



100%|██████████| 2/2 [01:47<00:00, 53.54s/it]
 90%|█████████ | 9/10 [22:04<02:17, 137.56s/it]
  0%|          | 0/2 [00:00<?, ?it/s][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Traceback (most recent call last):
  File "/kaggle/working/code.py", line 29, in <module>
    result = calculate_f_of_100()
  File "/kaggle/working/code.py", line 10, in calculate_f_of_100
    if f(f(f_n_1), 1) == 8 * 1 - 7:
  File "/kaggle/working/code.py", line 27, in f
    return f(f(f(n)))
  File "/kaggle/working/code.py", line 27, in f
    return f(f(f(n)))
  File "/kaggle/working/code.py", line 27, in f
    return f(f(f(n)))
  [Previous line repeated 994 more times]
  File "/kaggle/working/code.py", line 22, in f
    if n == 1:
RecursionError: maximum recursion depth exceeded in comparison


CODE RESULTS -1
BOXED []
BOXED 100



 50%|█████     | 1/2 [05:45<05:45, 345.98s/it][ASetting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


751.000000000000

CODE RESULTS 751
BOXED ['911']
BOXED 911



100%|██████████| 2/2 [08:43<00:00, 261.94s/it]
100%|██████████| 10/10 [30:48<00:00, 184.85s/it]


In [14]:
import numpy as np
from collections import Counter

final_answers = []

for a, b in zip(total_answers, total_results):
    a = np.array(a)
    b = np.array(b)
    a[a < 0] = b[a < 0]
    
    pred = Counter(a.tolist()).most_common(2)

    ans = pred[0][0] if not pred[0][0] < 0 else pred[1][0]

    final_answers.append(ans)
    print(ans)


504
500
156
560
310
100
967
256
436
100


In [15]:
df['answer'] = final_answers

In [16]:
df

Unnamed: 0,id,problem,answer
0,229ee8,"Let $k, l > 0$ be parameters. The parabola $y ...",504
1,246d26,Each of the three-digits numbers $111$ to $999...,500
2,2fc4ad,Let the `sparkle' operation on positive intege...,156
3,430b63,What is the minimum value of $5x^2+5y^2-8xy$ w...,560
4,5277ed,There exists a unique increasing geometric seq...,310
5,739bc9,For how many positive integers $m$ does the eq...,100
6,82e2a0,Suppose that we roll four 6-sided fair dice wi...,967
7,8ee6f3,"The points $\left(x, y\right)$ satisfying $((\...",256
8,bedda4,Let $ABCD$ be a unit square. Let $P$ be the po...,436
9,d7e9c9,A function $f: \mathbb N \to \mathbb N$ satisf...,100


In [17]:
df[['id','answer']].to_csv("submission.csv", header=True, index=False)

In [18]:
df[['id','answer']].head()

Unnamed: 0,id,answer
0,229ee8,504
1,246d26,500
2,2fc4ad,156
3,430b63,560
4,5277ed,310


In [19]:
if not PRIVATE:
    df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
    df['model_answer'] = final_answers
    df['match'] = df.answer == df.model_answer
    print(f'{df.match.sum()} matches in {len(df)} examples')

0 matches in 10 examples
