In [1]:
# credits:
# https://www.kaggle.com/code/olyatsimboy/aimo-openmath-mistral-baseline
# https://www.kaggle.com/code/aatiffraz/prompt-prediction-w-mixtral-mistral7b-gemma-llama
# https://www.kaggle.com/code/thedrcat/aimo-mixtral-baseline

# Zero-shot MMOS-DeepSeekMath-7B with self-consistency and generated code reasoning evaluation

Self-consistency is a modification of the standard greedy decoding in reasoning pipelines via sampling several diverse answers followed by aggregation, e.g., most common answer ([SC-CoT paper](https://arxiv.org/pdf/2203.11171.pdf)).

In this kernel, we will consider MMOS-DeepSeekMath-7B RL-tuned backbone; in my experiments, this model produces more consistent code reasoning and the code block execution will allow us to decrease arithmetic hallucinations.

In [2]:
!pip install -U /kaggle/input/bitsandbytes-0-42-0-py3-none-any-whl/bitsandbytes-0.42.0-py3-none-any.whl -qq

In [3]:
import torch
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    BitsAndBytesConfig, 
    AutoConfig,
    set_seed
)

set_seed(42)

MODEL_PATH = "/kaggle/input/deepseek-math"

quantization_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

config = AutoConfig.from_pretrained(MODEL_PATH)
config.gradient_checkpointing = True


tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="auto",
    torch_dtype="auto",
    trust_remote_code=True,
#     quantization_config=quantization_config,
    config=config
)

2024-04-30 18:26:18.335974: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-30 18:26:18.336078: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-30 18:26:18.460163: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
model.dtype

torch.bfloat16

In [5]:
import pandas as pd
from tqdm import tqdm
# PRIVATE = False

# if PRIVATE:
#     df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
# else:
#     df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/test.csv')
# df.head()

In [6]:
def log(text):
    with open('log.txt', 'a') as fout:
        fout.write(str(text))
        fout.write("\n")
    return 0

In [7]:
import gc
device = 'cuda'

In [8]:
def naive_parse(answer):
    out = []
    start = False
    end = False
    for l in reversed(list(answer)):
        if l in '0123456789' and not end:
            start = True
            out.append(l)
        else:
            if start:
                end = True
        
    out = reversed(out)
    return ''.join(out)

In [9]:
import transformers

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype='auto',
    device_map="auto",
)

In [10]:
print(f"Transformers Version: {transformers.__version__}")

Transformers Version: 4.38.2


In [11]:
import torch

torch.backends.cuda.enable_mem_efficient_sdp(False)

In [12]:
import re
import sys
import subprocess


def process_output(output):
    result = output
    
    try:
        code = output.split('```')[1][7:]
#         print("Code", code)

        with open('code.py', 'w') as fout:
            fout.write(code)

        batcmd = 'timeout 7 ' + sys.executable + ' code.py'
        try:
            shell_output = subprocess.check_output(batcmd, shell=True).decode('utf8')
#             print(shell_output)
            code_output = round(float(eval(shell_output))) % 1000
        except:
            code_output = -1

#         print('CODE RESULTS', code_output)
    
    except Exception as e:
        log(e)
        log('ERROR PARSING')
        code_output = -1
    
    try:
        result_output = re.findall(r'\\boxed\{(.*)\}', result)

        log('BOXED' + str(result_output))
        if not len(result_output):
            result_output = naive_parse(result)
        else:
            result_output = result_output[-1]

        log('BOXED' + str(result_output))
        if not len(result_output):
            result_output = -1
        
        else:
            result_output = round(float(eval(result_output))) % 1000
    
    except Exception as e:
        log(e)
        log('ERROR PARSING')
        result_output = -1
    
    return result_output, code_output

In [13]:
import re
from collections import defaultdict

def get_principles(problem):
#     id_ = df['id'].loc[idx]
#     problem = df['problem'].loc[idx]

    agent_instruction = "You are an expert at Mathematics. \
You are given a Mathematics problem. Your task is to extract the \
Mathematics concepts and principles involved in solving \
the problem. You need to provide only the related concepts, and \
not the answer to the question. Here are some examples: \
Question: <What is the value of 5!?> \
Principles: <Factorial of n is the multiplication of all natural numbers from 1 to n.> \
Question: <A circle has radius r. Its radius is decreased by a factor of 2. By what factor does the area decrease?> \
Principles: <Area of a circles is pi times r squared> \
Give the principles for the following question in brief: \n"

    agent_format = "\nYour response should be in the following format: \"The principles are: <your response>\""

    messages = [
            {
                "role": "user", 
                "content": agent_instruction + problem + agent_format
            }
        ]

    log(messages[0]["content"])

    query_prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False
    )

    try: 
        raw_output = pipeline(
                query_prompt, 
                max_new_tokens=2048, 
                do_sample=True, 
                temperature=0.7,
                return_full_text=False
            )
        raw_output = raw_output[0]['generated_text']
        log("Raw output" + str(raw_output))
        
        principles = ""
        temp = raw_output.split("The principles are")
        if len(temp):
            temp = temp[-1]
            idx = temp.find(".")
            if idx != -1:
                principles = temp[:idx+1]
            else:
                principles = temp
        else:
            principles = "Unavailable"

        torch.cuda.empty_cache()
        gc.collect()
        
        return principles

    except Exception as e:
        log(e)
        return "Unavailable"

In [14]:
import re
from collections import defaultdict

agent_instruction = "You are an expert at Mathematics. You are given a \
Mathematics problem and a set of principles involved in \
solving the problem. Solve the problem step by step by following the \
principles.\n"

tool_instruction = " The answer should be given as a non-negative modulo 1000."
tool_instruction += '\nPlease integrate natural language reasoning with programs to solve the problem above, and put your final answer within \\boxed{}.'

temperature = 0.8964

total_results = []
total_answers = []


def predict(problem):

#     log("Hi1" + problem)
#     return 0
    
    principles = get_principles(problem)
    

    messages = [
        {
            "role": "user", 
            "content": agent_instruction + "Question : " + problem + "\n" + "Principles: " + principles + "\n" + tool_instruction
        }
    ]
    
    log(messages[0]["content"])
    
    query_prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False
    )
     
    
    try:
        raw_output = pipeline(
            query_prompt, 
            max_new_tokens=2048, 
            do_sample=True, 
            temperature=temperature,
            return_full_text=False
        )
        raw_output = raw_output[0]['generated_text']
        log("Raw output" + str(raw_output))
        result_output, code_output = process_output(raw_output)

        torch.cuda.empty_cache()
        gc.collect()

    except Exception as e:
        log(e)
        result_output, code_output = -1, -1

    total_results.append(result_output)
    total_answers.append(code_output)
    
    ans = code_output
    if code_output < 0:
        ans = result_output
    
    return ans
#     break

In [15]:
# import aimo

# env = aimo.make_env()
# iter_test = env.iter_test()
# for test, sample_submission in iter_test:
#     sample_submission['answer'] = predict(test['problem'][0])
#     env.predict(sample_submission)
#     print(test)
#     print(sample_submission, '\n')

In [16]:
df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
for i in tqdm(range(len(df))):
    ans = predict(df['problem'][i])
    print(ans)

  0%|          | 0/10 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
 10%|█         | 1/10 [01:24<12:40, 84.49s/it]Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


20


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
 20%|██        | 2/10 [01:46<06:20, 47.58s/it]Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


43


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
 30%|███       | 3/10 [02:10<04:18, 36.88s/it]Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


14


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
 40%|████      | 4/10 [02:44<03:35, 35.93s/it]Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


500


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


310


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
Traceback (most recent call last):
  File "/kaggle/working/code.py", line 14, in <module>
    result = count_solutions()
  File "/kaggle/working/code.py", line 9, in count_solutions
    solutions = solve(equation, x)
  File "/opt/conda/lib/python3.10/site-packages/sympy/solvers/solvers.py", line 1007, in solve
    raise NotImplementedError('solving %s when the argument '
NotImplementedError: solving Abs(x - 1) when the argument is not real or imaginary.
 60%|██████    | 6/10 [04:55<03:34, 53.61s/it]Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


1


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
 70%|███████   | 7/10 [05:55<02:46, 55.57s/it]Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


97


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
 80%|████████  | 8/10 [06:24<01:34, 47.03s/it]Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


72


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
 90%|█████████ | 9/10 [08:28<01:11, 71.14s/it]Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.


916


Setting `pad_token_id` to `eos_token_id`:100001 for open-end generation.
100%|██████████| 10/10 [10:24<00:00, 62.41s/it]

127





In [17]:
with open("log.txt", "r") as f:
    print(f.read())

You are an expert at Mathematics. You are given a Mathematics problem. Your task is to extract the Mathematics concepts and principles involved in solving the problem. You need to provide only the related concepts, and not the answer to the question. Here are some examples: Question: <What is the value of 5!?> Principles: <Factorial of n is the multiplication of all natural numbers from 1 to n.> Question: <A circle has radius r. Its radius is decreased by a factor of 2. By what factor does the area decrease?> Principles: <Area of a circles is pi times r squared> Give the principles for the following question in brief: 
Let $k, l > 0$ be parameters. The parabola $y = kx^2 - 2kx + l$ intersects the line $y = 4$ at two points $A$ and $B$. These points are distance 6 apart. What is the sum of the squares of the distances from $A$ and $B$ to the origin?
Your response should be in the following format: "The principles are: <your response>"
Raw outputThe principles are: Parabola, distance bet