In [1]:
import os 
import re

import pandas as pd
from tqdm import tqdm
from IPython.display import Latex

In [2]:
DATAPATH = "data"

# Data

In [3]:
train = pd.read_csv(os.path.join(DATAPATH, "raw/train.csv"), index_col="problem_id")
test = pd.read_csv(os.path.join(DATAPATH, "raw/test.csv"), index_col="problem_id")
submission = pd.read_csv(os.path.join(DATAPATH, "raw/sample_submission.csv"))

In [4]:
train.to_dict("records")[0]

{'problem_text': 'Find the value of the expression $\\dfrac{17}{5} :\\dfrac{34}{3} +1.3$.',
 'answer': '1.6'}

# Query

In [10]:
query = train.iloc[0]["problem_text"]
answer = train.iloc[0]["answer"]

In [11]:
Latex(query)

<IPython.core.display.Latex object>

In [12]:
answer

'1.6'

In [13]:
query = test.iloc[0]["problem_text"]

In [14]:
Latex(query)

<IPython.core.display.Latex object>

# LLM

In [15]:
from openai import OpenAI

In [16]:
client = OpenAI(api_key=os.environ["OPENAI_KEY"])

In [17]:
response = client.chat.completions.create(
    model='gpt-4o',
    messages=[{"role": "user", "content": query}]
)

In [18]:
Latex(response.choices[0].message.content)

<IPython.core.display.Latex object>

In [19]:
4.8 * 2.5

12.0

# Promt

In [20]:
def build_prompt(query):
    prompt_template = f"""Role:
You are an advanced AI system with exceptional mathematical reasoning and problem-solving capabilities, specifically designed to solve tricky math problems (whose answer is a non-negative integer) written in LaTeX format from the AI Mathematical Olympiad (AIMO) competition. Your task is to accurately analyze and solve intricate mathematical problems, demonstrating a deep understanding of mathematical concepts and a strong ability to apply logical reasoning strategies.

Instruction:
1. Carefully read and comprehend the problem statement provided in the "Problem" section.
2. In the "Solution" section, provide a solution of the problem with detailed explanation of your logical reasoning process. Keep in mind that answer must be a non-negative integer number.
3. At the end, create a "Answer" section where you will state only the final numerical or algebraic answer, without any additional text or narrative.

Problem:
...

Solution:
...

Answer:
...

{query}

Step-by-step solution and final answer:"""
    return prompt_template

In [21]:
promt = build_prompt(query)

In [22]:
Latex(promt)

<IPython.core.display.Latex object>

In [23]:
response = client.chat.completions.create(
    model='gpt-4o',
    messages=[{"role": "user", "content": promt}]
)

In [26]:
answer = response.choices[0].message.content

In [28]:
def extract_numerical_answer(text):
    # Look for patterns like "Final answer: X" or "The answer is X" at the end of the text
    match = re.search(r'(?:final answer|the answer is)[:\s]*([+-]?\d*\.?\d+)', text, re.IGNORECASE)
    if match:
        return float(match.group(1))
    else:
        # If no clear final answer, look for the last number in the text
        numbers = re.findall(r'[+-]?\d*\.?\d+', text)
        return float(numbers[-1]) if numbers else 1.0

In [27]:
Latex(answer)

<IPython.core.display.Latex object>

In [29]:
extract_numerical_answer(answer)

12.0

# Rag

In [51]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [52]:
def rag(query):
    prompt = build_prompt(query)
    response = llm(prompt)
    answer = extract_numerical_answer(response)
    return answer, response

In [53]:
Latex(query)

<IPython.core.display.Latex object>

In [54]:
answer, _ = rag(query)

In [None]:
(17/5) / (34/3) + 1.3

In [55]:
answer

2.0

In [56]:
answers = []
responses = []
for row in tqdm(train.to_dict("records")):
    query = row["problem_text"]
    answer, response = rag(query)
    answers.append(answer)
    responses.append(response)

100%|██████████| 100/100 [13:05<00:00,  7.85s/it]


In [86]:
train["predict"] = answers
train["response"] = responses

# Metrics

In [87]:
from sklearn.metrics import accuracy_score

In [95]:
def fix_suffix(value):
    if value.endswith('.0'):
        return value[:-2]
    else:
        return value

In [97]:
train["predict"] = train["predict"].astype(str).apply(fix_suffix)

In [103]:
accuracy_score(train["answer"], train["predict"])

0.63

# Test

In [104]:
answers_test = []
responses_test = []
for row in tqdm(test.to_dict("records")):
    query = row["problem_text"]
    answer, response = rag(query)
    answers_test.append(answer)
    responses_test.append(response)

100%|██████████| 100/100 [11:37<00:00,  6.98s/it]


In [105]:
test["predict"] = answers_test
test["response"] = responses_test

In [106]:
test["predict"] = test["predict"].astype(str).apply(fix_suffix)

In [112]:
submission = test.rename(columns={"predict": "answer"})["answer"].reset_index()

In [113]:
submission

Unnamed: 0,problem_id,answer
0,11919,12
1,8513,11285
2,7887,4
3,5272,6
4,8295,13
...,...,...
95,3519,134
96,7934,12
97,9390,217000
98,7137,22


In [114]:
submission.to_csv("result/baseline.csv", index=False)