In [2]:
import pandas as pd

In [3]:
from langchain_ollama.llms import OllamaLLM
from langchain.prompts import PromptTemplate

llm = OllamaLLM(model="mathstral:7b",temperature=0, max_tokens=1500)

In [4]:
print(llm)

[1mOllamaLLM[0m
Params: {}


In [5]:
def get_answer(question):
    prompt = f"""Role:
    You are an advanced AI system with exceptional mathematical reasoning and problem-solving capabilities, specifically designed to solve tricky math problems (whose answer is a non-negative integer) written in LaTeX format from the AI Mathematical Olympiad (AIMO) competition. Your task is to accurately analyze and solve intricate mathematical problems, demonstrating a deep understanding of mathematical concepts and a strong ability to apply logical reasoning strategies.
    
    Instruction:
    1. Carefully read and comprehend the problem statement provided in the "Problem" section.
    2. In the "Solution" section, provide a solution of the problem with detailed explanation of your logical reasoning process. Keep in mind that answer must be a non-negative integer number.
    3. At the end, create a "Answer" section where you will state only the final numerical or algebraic answer, without any additional text or narrative.
    Problem:
    ...

    Solution:
    ...
    
    Answer:
    ...
    
    {question}
    
    Step-by-step solution and final answer:"""
    
    response = llm.invoke(prompt)
    
    return response.strip()

In [6]:
import re

def extract_numerical_answer(text):
    # Look for patterns like "Final answer: X" or "The answer is X" at the end of the text
    match = re.search(r'(?:final answer|the answer is)[:\s]*([+-]?\d*\.?\d+)', text, re.IGNORECASE)
    if match:
        return float(match.group(1))
    else:
        # If no clear final answer, look for the last number in the text
        numbers = re.findall(r'[+-]?\d*\.?\d+', text)
        return float(numbers[-1]) if numbers else None

In [7]:
# Script for runing faster
from tqdm.auto import tqdm

from concurrent.futures import ThreadPoolExecutor

pool = ThreadPoolExecutor(max_workers=6)

def map_progress(pool, seq, f):
    results = []

    with tqdm(total=len(seq)) as progress:
        futures = []

        for el in seq:
            future = pool.submit(f, el)
            future.add_done_callback(lambda p: progress.update())
            futures.append(future)
            
        for future in futures:
            result = future.result()
            results.append(result)

    return results

In [8]:
def process_row(row):
    problem_id = row['problem_id']
    problem_text = row['problem_text']

    llm_reasoning = get_answer(problem_text)

    numerical_answer = extract_numerical_answer(llm_reasoning)

    return {
        'problem_id': problem_id,
        'problem_text': problem_text,
        'llm_reasoning': llm_reasoning,
        'answer': numerical_answer
    }

In [9]:
df_train = pd.read_csv('train.csv')
df_train.head()

Unnamed: 0,problem_id,problem_text,answer
0,2374,Find the value of the expression $\dfrac{17}{5...,1.6
1,4723,"In a company of 30 people, 25 use the social n...",24.0
2,7135,The number of road traffic accidents (RTAs) in...,32.0
3,5814,Find the value of the expression $\dfrac{2\str...,256.0
4,9237,A traveler from Moscow wants to visit four cit...,53.0


In [10]:
rows = df_train.head().to_dict(orient='records')
process_row(rows[0])

{'problem_id': 2374,
 'problem_text': 'Find the value of the expression $\\dfrac{17}{5} :\\dfrac{34}{3} +1.3$.',
 'llm_reasoning': '1. First, we need to understand that the problem is asking for the value of the expression $\\dfrac{17}{5} :\\dfrac{34}{3} +1.3$. The symbol ":" represents division in this context.\n2. Next, we perform the division operation. To divide fractions, we multiply the first fraction by the reciprocal of the second fraction. So, $\\dfrac{17}{5} :\\dfrac{34}{3}$ becomes $\\dfrac{17}{5} \\times \\dfrac{3}{34}$.\n3. Simplify the fractions: $\\dfrac{17}{5} \\times \\dfrac{3}{34}$ simplifies to $\\dfrac{51}{170}$.\n4. Now, we add 1.3 to the result of the division. So, $\\dfrac{51}{170} + 1.3$.\n5. To add a fraction and a decimal, we first convert the decimal to a fraction. $1.3$ is equivalent to $\\dfrac{13}{10}$.\n6. Now, we have $\\dfrac{51}{170} + \\dfrac{13}{10}$. To add fractions with different denominators, we find a common denominator. The least common multipl

In [11]:
results = map_progress(pool, rows, process_row)
df_results = pd.DataFrame(results)
df_results

  0%|          | 0/5 [00:00<?, ?it/s]

Unnamed: 0,problem_id,problem_text,llm_reasoning,answer
0,2374,Find the value of the expression $\dfrac{17}{5...,"1. First, we need to understand that the probl...",85.0
1,4723,"In a company of 30 people, 25 use the social n...",1. Let's denote the number of people who use b...,24.0
2,7135,The number of road traffic accidents (RTAs) in...,1. Let's denote the number of road traffic acc...,32.0
3,5814,Find the value of the expression $\dfrac{2\str...,"1. First, we simplify the numerator by adding ...",256.0
4,9237,A traveler from Moscow wants to visit four cit...,1. **Identify the routes that cover all four c...,36.0


In [12]:
import pandas as pd
import numpy as np


class ParticipantVisibleError(Exception):
    # If you want an error message to be shown to participants, you must raise the error as a ParticipantVisibleError
    # All other errors will only be shown to the competition host. This helps prevent unintentional leakage of solution data.
    pass

def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
    '''
    Accuracy that works with multiple correct answers.
    '''
    solution = solution.set_index(row_id_column_name, drop=True)
    submission = submission.set_index(row_id_column_name, drop=True)
    submission = submission.loc[solution.index]
        
    target_column = 'answer'
    assert target_column in solution.columns
    assert target_column in submission.columns

    # This fix is needed because submission is loaded with default parameters
    # Pandas magically converts string column into float
    def fix_suffix(value):
        if value.endswith('.0'):
            return value[:-2]
        else:
            return value
    submission[target_column] = submission[target_column].astype(str)
    submission[target_column] = submission[target_column].apply(fix_suffix)
    
    
    def convert_to_list(value):
        values = [v.strip() for v in value.strip().lstrip('[').rstrip(']').split(',')]
        return values

    solution[target_column] = solution[target_column].astype(str).apply(convert_to_list)

    correct = [
        submit_answer in correct_answer
        for correct_answer, submit_answer in zip(
            solution[target_column].values, 
            submission[target_column].values
        )
    ]
            
    return np.mean(correct)    

In [13]:
score(df_train.head(), df_results, 'problem_id')

0.6

In [14]:
def prepare_prompts_and_get_answers(df):
    rows = df.to_dict(orient='records')
    results = map_progress(pool, rows, process_row)
    return pd.DataFrame(results)

In [None]:
df_train_results = prepare_prompts_and_get_answers(df_train)
df_train_results.head()

  0%|          | 0/100 [00:00<?, ?it/s]

In [None]:
score(df_train, df_train_results, 'problem_id')

In [None]:
df_test = pd.read_csv('/kaggle/input/llm-zoomcamp-2024-competition/test.csv')

df_test_results = prepare_prompts_and_get_answers(df_test)

submission = df_test_results[['problem_id', 'answer']]
submission.to_csv('submission.csv', index=False)