In [1]:
import os 
import re

import pandas as pd
from tqdm import tqdm
from IPython.display import Latex

In [2]:
DATAPATH = "data"

# Data

In [3]:
train = pd.read_csv(os.path.join(DATAPATH, "raw/train.csv"), index_col="problem_id")
test = pd.read_csv(os.path.join(DATAPATH, "raw/test.csv"), index_col="problem_id")
submission = pd.read_csv(os.path.join(DATAPATH, "raw/sample_submission.csv"))

In [4]:
train.to_dict("records")[0]

{'problem_text': 'Find the value of the expression $\\dfrac{17}{5} :\\dfrac{34}{3} +1.3$.',
 'answer': '1.6'}

# Query

In [5]:
query = train.iloc[0]["problem_text"]
answer = train.iloc[0]["answer"]

In [6]:
Latex(query)

<IPython.core.display.Latex object>

In [7]:
answer

'1.6'

In [8]:
query = test.iloc[0]["problem_text"]

In [9]:
Latex(query)

<IPython.core.display.Latex object>

# LLM

In [10]:
from openai import OpenAI

In [11]:
client = OpenAI(api_key=os.environ["OPENAI_KEY"])

In [12]:
response = client.chat.completions.create(
    model='gpt-4o-mini',
    messages=[{"role": "user", "content": query}]
)

In [13]:
Latex(response.choices[0].message.content)

<IPython.core.display.Latex object>

In [14]:
4.8 * 2.5

12.0

# Promt

In [15]:
def extract_numerical_answer(text):
    # Look for patterns like "Final answer: X" or "The answer is X" at the end of the text
    match = re.search(r'(?:final answer|the answer is)[:\s]*([+-]?\d*\.?\d+)', text, re.IGNORECASE)
    if match:
        return float(match.group(1))
    else:
        # If no clear final answer, look for the last number in the text
        numbers = re.findall(r'[+-]?\d*\.?\d+', text)
        return float(numbers[-1]) if numbers else 1.0

In [16]:
def build_prompt_stage_1(query):
    prompt_template = f"""Role:
You are an advanced AI system with exceptional mathematical reasoning and problem-solving capabilities, specifically designed to solve tricky math problems (whose answer is a non-negative integer) written in LaTeX format from the AI Mathematical Olympiad (AIMO) competition. Your task is to accurately analyze and solve intricate mathematical problems, demonstrating a deep understanding of mathematical concepts and a strong ability to apply logical reasoning strategies.

Instruction:
1. Carefully read and comprehend the problem statement provided in the "Problem" section.
2. In the "Solution" section, provide a solution of the problem with detailed explanation of your logical reasoning process. Keep in mind that answer must be a non-negative integer number.
3. At the end, create a "Answer" section where you will state only the final numerical or algebraic answer, without any additional text or narrative.

Problem:
...

Solution:
...

Answer:
...

{query}

Step-by-step solution and final answer:"""
    return prompt_template

In [17]:
prompt_stage_1 = build_prompt_stage_1(query)

In [18]:
Latex(prompt_stage_1)

<IPython.core.display.Latex object>

In [21]:
response = client.chat.completions.create(
    model='gpt-4o-mini',
    messages=[{"role": "user", "content": prompt_stage_1}]
)

In [22]:
answer = response.choices[0].message.content

In [23]:
Latex(answer)

<IPython.core.display.Latex object>

In [26]:
def build_prompt_stage_2(query, solution):
    prompt_template = f""" Please analyze the solution for a given problem, fix it if needed, and then provide the final answer.         
Your response should end in the format: 'Hence, the final answer is [numeric string].
    
Q: {query}

Solution: {solution}

Analysis: 

    """
    return prompt_template

In [27]:
prompt_stage_2 = build_prompt_stage_2(query, answer)

In [28]:
Latex(prompt_stage_2)

<IPython.core.display.Latex object>

In [30]:
response_stage_2 = client.chat.completions.create(
    model='gpt-4o-mini',
    messages=[{"role": "user", "content": prompt_stage_2}]
)

In [32]:
answer = response_stage_2.choices[0].message.content

In [33]:
Latex(answer)

<IPython.core.display.Latex object>

In [34]:
extract_numerical_answer(answer)

12.0

# Rag

In [35]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [40]:
def rag(query):
    # Stage 1
    prompt_stage_1 = build_prompt_stage_1(query)
    response_stage_1 = llm(prompt_stage_1)
    # Stage 2
    prompt_stage_2 = build_prompt_stage_2(query, response_stage_1)
    response_stage_2 = llm(prompt_stage_2)
    # Extract answer
    answer = extract_numerical_answer(response_stage_2)
    return answer, response_stage_1, response_stage_2

In [41]:
Latex(query)

<IPython.core.display.Latex object>

In [42]:
answer, _, _ = rag(query)

In [None]:
(17/5) / (34/3) + 1.3

In [43]:
answer

12.0

In [78]:
train_result = []

for _, row in tqdm(train.iterrows(), total=len(test)):
        
    query = row['problem_text']
    answer, response_1, response_2 = rag(query)    
    train_result.append({
        'problem_id': row.name,
        'stage_1': response_1,
        'stage_2': response_2,
        'predict': answer,
        'answer': row["answer"]
    })

In [58]:
train_result_df = pd.DataFrame(train_result)

# Metrics

In [46]:
from sklearn.metrics import accuracy_score

In [47]:
def fix_suffix(value):
    if value.endswith('.0'):
        return value[:-2]
    else:
        return value

In [81]:
train_result_df["predict"] = train_result_df["predict"].astype(str).apply(fix_suffix)

In [82]:
accuracy_score(train["answer"], train_result_df["predict"])

0.65

# Test

In [83]:
test_result = []

for _, row in tqdm(test.iterrows(), total=len(test)):

    query = row['problem_text']
    answer, response_1, response_2 = rag(query)    
    test_result.append({
        'problem_id': row.name,
        'stage_1': response_1,
        'stage_2': response_2,
        'predict': answer,
    })

100%|██████████| 100/100 [20:15<00:00, 12.16s/it]


In [87]:
test_result_df = pd.DataFrame(test_result)

In [90]:
test_result_df

Unnamed: 0,problem_id,stage_1,stage_2,predict
0,11919,Solution:\nTo find the value of the expression...,The solution method provided for the multiplic...,12.0
1,8513,To convert the flight altitude from feet to me...,The original solution contains a mistake in th...,11285.0
2,7887,To find the length \( a \) of the rectangular ...,Let's analyze and verify the solution step by ...,4.0
3,5272,To solve the equation \(\left(\frac{1}{8}\righ...,"The original solution appears to be correct, a...",6.0
4,8295,To determine the minimum number of two-person ...,The provided solution accurately follows the l...,13.0
...,...,...,...,...
95,3519,To find a set of tours such that the tourist v...,Let's re-check the analysis and calculations f...,314.0
96,7934,To determine the length of side \( b \) of the...,Let's verify the solution and calculations ste...,12.0
97,9390,To determine the cost of the cheapest purchase...,The provided solution seems to be correctly ca...,217000.0
98,7137,To find the percentage decrease in the number ...,The original solution correctly identify the r...,22.0


In [91]:
test_result_df["predict"] = test_result_df["predict"].astype(str).apply(fix_suffix)

In [93]:
submission = test_result_df.rename(columns={"predict": "answer"})["answer"].reset_index()

In [98]:
submission["index"] = test.index

In [102]:
submission = submission.rename(columns={"index": "problem_id"})

In [104]:
submission.to_csv("result/double-check.csv", index=False)

In [103]:
submission

Unnamed: 0,problem_id,answer
0,11919,12
1,8513,11285
2,7887,4
3,5272,6
4,8295,13
...,...,...
95,3519,314
96,7934,12
97,9390,217000
98,7137,22
