In [1]:
# credits: https://www.kaggle.com/code/thedrcat/aimo-mixtral-baseline
# credits: https://www.kaggle.com/code/aatiffraz/prompt-prediction-w-mixtral-mistral7b-gemma-llama

In [2]:
!pip install -U /kaggle/input/bitsandbytes-0-42-0-py3-none-any-whl/bitsandbytes-0.42.0-py3-none-any.whl -qq

In [3]:
PRIVATE = False

In [4]:
import re
from tqdm import tqdm

import numpy as np
import pandas as pd
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig

In [5]:
# Evaluation environment
import aimo

PRIVATE = True

if PRIVATE:
    env = aimo.make_env()
else:
    df_train = pd.read_csv('/kaggle/input/ai-mathematical-olympiad-prize/train.csv')
    class Env:
        def __init__(self, df_train):
            self.df_train = df_train
            self.df_train.set_index('id', inplace=True)
            self.df_results = df_train.copy()
        def predict(self, x):
            self.df_results.loc[x.name, 'answer'] = x['answer']
        def iter_test(self):
            for i in range(df_train.shape[0]):
                yield self.df_train.iloc[i], self.df_results.iloc[i]
    env = Env(df_train)
    

In [6]:
MODEL_PATH = "/kaggle/input/mixtral/pytorch/8x7b-instruct-v0.1-hf/1"

quantization_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# To prevent GPU memory overflow in Mixtral8x7b
config = AutoConfig.from_pretrained(MODEL_PATH)
config.gradient_checkpointing = True


tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    device_map = "auto",
    trust_remote_code = True,
    quantization_config=quantization_config,
    config=config
)

Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

In [7]:
def gen_prompt(problem):
    
    return f"""
Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n
### Instruction:\n{problem}\n\n
### Response: Let's think step by step. The final response should be a single number in the last line of your response.
"""

additional_prompt = "So the answer is (final integer value):"

In [8]:
import gc
device = 'cuda'

In [9]:
# Function to parse textual answer (integer at the end, possibly in a sentence
pattern_answer = re.compile(r'(\d+)(\$|\.|\*)*$')
def try_parse_answer(answer):
    answer = answer.strip().split('\n')[-1].strip().lower()
    if match := re.search(pattern_answer, answer):
        return int(match.group(1))

In [10]:
successes = 0
total = 0

for test, sample_submission in env.iter_test():
    try:
        problem = test['problem']
        if not PRIVATE:
            real_ans = test['answer']
        query_prompt = gen_prompt(problem)
        
        messages = [
            {
                "role": "user",
                "content": query_prompt
            }
        ]

        inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
        with torch.no_grad():
            encoded_output = model.generate(inputs, 
                                            max_length=4096, 
                                            do_sample=True, 
                                            temperature=0.9,
                                            top_p=0.95)
        decoded_output = tokenizer.decode(encoded_output[0, inputs.shape[1]:], skip_special_tokens=True)
        print(decoded_output)
        answer = try_parse_answer(decoded_output)
        
        if answer is None:
            print(additional_prompt)
            messages.extend([
                {
                    "role": "assistant",
                    "content": decoded_output
                },
                {
                    "role": "user",
                    "content": additional_prompt
                }
            ])
            inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
            with torch.no_grad():
                encoded_output = model.generate(inputs,
                                                max_length=4096,
                                                do_sample=True,
                                                temperature=0.9,
                                                top_p=0.95)

            decoded_output = tokenizer.decode(encoded_output[0, inputs.shape[1]:], skip_special_tokens=True)
            print(decoded_output)
            answer = try_parse_answer(decoded_output)
        
        print(answer)
        if answer is not None:
            answer = int(round(answer)) % 1000
            print(answer)
        torch.cuda.empty_cache()
        gc.collect()
    except Exception as e:
        print(e)
        answer = 0
    sample_submission["answer"] = answer
    env.predict(sample_submission)
    if not PRIVATE:
        if answer == real_ans:
            print("Success!")
            successes += 1
        else:
            print(f"Failure! Real answer={real_ans}")
        total += 1
        
if not PRIVATE:
    print(f"Accuracy: {successes/total}")

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
2024-05-15 22:02:59.923079: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-15 22:02:59.923346: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-15 22:03:00.044039: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Sure, I'd be happy to help with that! The expression you've given is $1 - 1$. To evaluate this, we simply subtract 1 from 1. So:

$1 - 1 = 0$

Therefore, $1 - 1$ is equal to 0.
0
0


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Sure, I'd be happy to help you with that! The expression you've given is $0 \times 10$.

In numerical multiplication, when we multiply any number by 0, the result is always 0. This is a fundamental rule of arithmetic. Therefore, $0 \times 10$ equals 0.
0
0


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Sure, I'd be happy to help you solve for $x$ in the equation $4 + x = 4$. Here's how we can solve it:

1. The first step is to isolate $x$ on one side of the equation. To do this, we can subtract 4 from both sides of the equation. This gives us:

$4 - 4 + x = 4 - 4$

2. Simplifying both sides of the equation, we get:

$0 + x = 0$

3. Finally, we can simplify this equation to:

$x = 0$

Therefore, the solution to the equation $4 + x = 4$ is $x = 0$.
0
0
