# Thank you to https://www.kaggle.com/code/quan0095/more-diversity-in-output-improve-score
# Rho-1 model [Rho-1: Not All Tokens Are What You Need](https://github.com/microsoft/rho?tab=readme-ov-file)


In [None]:
# credits:
# https://www.kaggle.com/code/olyatsimboy/aimo-openmath-mistral-baseline
# https://www.kaggle.com/code/aatiffraz/prompt-prediction-w-mixtral-mistral7b-gemma-llama
# https://www.kaggle.com/code/thedrcat/aimo-mixtral-baseline

# Zero-shot MMOS-DeepSeekMath-7B with self-consistency and generated code reasoning evaluation

Self-consistency is a modification of the standard greedy decoding in reasoning pipelines via sampling several diverse answers followed by aggregation, e.g., most common answer ([SC-CoT paper](https://arxiv.org/pdf/2203.11171.pdf)).

In this kernel, we will consider MMOS-DeepSeekMath-7B RL-tuned backbone; in my experiments, this model produces more consistent code reasoning and the code block execution will allow us to decrease arithmetic hallucinations.

In [None]:
!pip install -U /kaggle/input/bitsandbytes-0-42-0-py3-none-any-whl/bitsandbytes-0.42.0-py3-none-any.whl -qq


In [None]:
import torch
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    BitsAndBytesConfig, 
    AutoConfig,
    set_seed
)

set_seed(42)

MODEL_PATH = "/kaggle/input/microsoftrho-math-7b-interpreter-v0-1/rho-math-7b-interpreter-v0.1"

quantization_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

config = AutoConfig.from_pretrained(MODEL_PATH)
config.gradient_checkpointing = True


tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map="auto",
    torch_dtype="auto",
    trust_remote_code=True,
#     quantization_config=quantization_config,
    config=config
)

In [None]:
model.dtype

In [None]:
import pandas as pd
from tqdm import tqdm
PRIVATE = True

df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/test.csv')
df.head()

In [None]:
if len(df) < 5:
    df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
    PRIVATE = False
df.head()

In [None]:
import gc
device = 'cuda'

In [None]:
def naive_parse(answer):
    out = []
    start = False
    end = False
    for l in reversed(list(answer)):
        if l in '0123456789' and not end:
            start = True
            out.append(l)
        else:
            if start:
                end = True
        
    out = reversed(out)
    return ''.join(out)

In [None]:
import transformers

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype='auto',
    device_map="cuda",
)

In [None]:
print(f"Transformers Version: {transformers.__version__}")

In [None]:
import torch

torch.backends.cuda.enable_mem_efficient_sdp(False)

In [None]:
import re
import sys
import subprocess


def process_output(output):
    result = output
    
    try:
        code = output.split('```')[1][7:]

        with open('code.py', 'w') as fout:
            fout.write(code)

        batcmd = 'timeout 7 ' + sys.executable + ' code.py'
        try:
            shell_output = subprocess.check_output(batcmd, shell=True).decode('utf8')
            print(shell_output)
            code_output = round(float(eval(shell_output))) % 1000
        except:
            code_output = -1

        print('CODE RESULTS', code_output)
    
    except Exception as e:
        print(e)
        print('ERROR PARSING')
        code_output = -1
    
    try:
        result_output = re.findall(r'\\boxed\{(.*)\}', result)

        print('BOXED', result_output)
        if not len(result_output):
            result_output = naive_parse(result)
        else:
            result_output = result_output[-1]

        print('BOXED', result_output)
        if not len(result_output):
            result_output = -1
        
        else:
            result_output = round(float(eval(result_output))) % 1000
    
    except Exception as e:
        print(e)
        print('ERROR PARSING')
        result_output = -1
    
    return result_output, code_output

In [None]:
def toMin(s   ):
     res = s   
     if ('hours and' in s ):
       if ('minutes' in s[s.index('hours and'): ]):  
         if s.index('minutes',s.index('hours and'))-s.index('hours and') == 13:  
           res =  s[0:s.index('hours and')-3]+ ' '+ str(int(s[s.index('hours and')-3:s.index('hours and')])*60  + int(s[s.index('hours and')-2+12:s.index('hours and')+12]))+ ' '+ s[s.index('minutes',s.index('hours and')): ] 
         if s.index('minutes',s.index('hours and'))-s.index('hours and') == 12:  
           res =  s[0:s.index('hours and')-3]+ ' '+ str(int(s[s.index('hours and')-3:s.index('hours and')])*60  + int(s[s.index('hours and')-2+12:s.index('hours and')+12]))+ ' '+ s[s.index('minutes',s.index('hours and')): ] 
     return res 
def toMin_Many(s   ):
    s_old = s
    s_new = toMin(s_old )
    while 1==1:
        if s_old == s_new:
            break
        else:
           s_old = s_new
           s_new = toMin(s_old ) 
    s  = s_new
    return s

# Running the model

In [None]:
import re
from collections import defaultdict

def tool_instruction(problem):
    return f"""Problem: {problem}

Chain of Thought:

Understanding the Problem: Clearly describe what the problem is asking. This involves identifying key quantities and operations needed to solve the problem.
Devising a Plan: Determine the steps required to solve the problem. This may involve formulas, identifying patterns, or setting up equations.
Executing the Plan: Carry out the steps outlined in the plan. This part should include calculations, iterations, or any logical reasoning required to get to the solution.
Checking the Work: Verify the solution for correctness and consistency with the problem requirements.
Formatting the Final Answer: Present the final answer in the required format. According to the instructions given:
The answer should be a non-negative number.
Apply modulo 1000 operation to ensure the result is within the correct range.
Put your final answer within \\boxed{{}}
"""
# tool_instruction += '\nPlease integrate natural language reasoning with programs to solve the problem above, and put your final answer within \\boxed{}.'
# tool_instruction = open("/kaggle/input/aimo-cot-prompt/AIMO_prompt.txt").read()


n_repetitions = 5 if PRIVATE else 2

total_results = []
total_answers = []

for i in tqdm(range(len(df))):
    id_ = df['id'].loc[i]
    problem = df['problem'].loc[i]
    problem = toMin_Many(problem   )
    messages = [
        {
            "role": "user", 
#             "content":  tool_instruction + "Question:"+ problem
            "content": tool_instruction(problem)

#             "content": problem + tool_instruction
        }
    ]
    
    query_prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False
    )
    
    results = []
    answers = []
    
    for _ in tqdm(range(n_repetitions)):
        try:
            raw_output = pipeline(
                query_prompt, 
                max_new_tokens=2048, 
                do_sample=True, 
                temperature=0.9,
                return_full_text=False
            )
            raw_output = raw_output[0]['generated_text']

            result_output, code_output = process_output(raw_output)
            print("="*10)
            print("Q:" + query_prompt)
            print("R:" + raw_output)
            print("C:" + code_output)
            print()
            print()
            print("="*10)
#             break
            torch.cuda.empty_cache()
            gc.collect()

        except Exception as e:
            print(e)
            result_output, code_output = -1, -1
        
        results.append(result_output)
        answers.append(code_output)
    
    total_results.append(results)
    total_answers.append(answers)

In [None]:
import numpy as np
from collections import Counter

final_answers = []

for a, b in zip(total_answers, total_results):
    a = np.array(a)
    b = np.array(b)
    a[a < 0] = b[a < 0]
    
    pred = Counter(a.tolist()).most_common(2)

    ans = pred[0][0] if not pred[0][0] < 0 else pred[1][0]

    final_answers.append(ans)
    print(ans)

In [None]:
df['answer'] = final_answers

In [None]:
df

In [None]:
df[['id','answer']].to_csv("submission.csv", header=True, index=False)

In [None]:
df[['id','answer']].head()

In [None]:
if not PRIVATE:
    df = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
    df['model_answer'] = final_answers
    df['match'] = df.answer == df.model_answer
    print(f'{df.match.sum()} matches in {len(df)} examples')