### Import

In [1]:
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage 
import os
import pandas as pd
import re

#### Input data structure:

In [None]:
{'question': 'For every 12 ... ', 'answer': 'There are 144/12 = <<144/12=12>>12 ... \n#### 12'}

#### Ex 1:

In [2]:
data = []

with open('math_problems.jsonl', 'r', encoding='utf-8') as f_in:
    for line in f_in:
        data.append(eval(line))

data_df = pd.DataFrame(data)
data_df = data_df.explode(['question', 'answer'])

In [3]:
data_df['target'] = data_df['answer'].apply(lambda x: x.split('####')[-1].strip())

In [4]:
data_df.head()

Unnamed: 0,question,answer,target
0,"For every 12 cans you recycle, you receive $0....",There are 144/12 = <<144/12=12>>12 sets of 12 ...,12
1,Betty picked 16 strawberries. Matthew picked 2...,Matthew picked 16 + 20 = <<16+20=36>>36 strawb...,40
2,Jack has a stack of books that is 12 inches th...,There are 960 pages because 80 x 12 = <<80*12=...,160
3,James dumps his whole collection of 500 Legos ...,James starts with 500 Legos and uses half of t...,245
4,Ines had $20 in her purse. She bought 3 pounds...,Ines bought 3 pounds of peaches for 3 peaches ...,14


In [5]:
# Initialize the Mistral client with your API key
MODEL = "mistral-small"
TEMPERATURE = 0.0
TOP_P = 1

In [6]:
# Function to open and read the API key file
def open_file(filepath):
    with open(filepath, "r", encoding='utf-8') as infile:
        return infile.read()

# Function to interact with the Mistral API and get a response
def get_mistral_response(user_content):
    
    # Load api key
    api_key = open_file("mistral_api_key.txt")
    client = MistralClient(api_key=api_key)

    # Prepare a List of ChatMessage objects with the user's content
    messages = [ChatMessage(role="user", content=user_content)]
    
    # Get the response from the Mistral API without streaming
    chat_response = client.chat(
        model=MODEL, 
        temperature=TEMPERATURE,
        top_p=TOP_P,
        messages=messages,

        )
    
    try:
        # Extract the content from the response
        response_content = chat_response.choices[0].message.content if chat_response.choices else "" 
    except AttributeError as e:
        print(f"An error occurred while processing the response: {e}")
        response_content = ""
    
    # Return the response content
    return response_content


In [7]:
N_RECORD = 11

# Prompt
# user_content = prompt['question']
user_content = data_df.iloc[N_RECORD,0]

# Result
print("Full response:")
response = get_mistral_response(user_content)
print (f"{response}")
print('*'*100)
print("Final result: ")
new_prompt = 'Extract only one number which represent final result from the following text: ' + response
response_sum = re.findall(r'\d+', get_mistral_response(new_prompt))[0]
print (f"{response_sum}")
print('*'*100)
print(f"Expected output: {data_df.iloc[N_RECORD,2]}")

Full response:
To find out how many bunnies Michael has, we first need to determine how many pets are dogs and cats:

Dogs = Total pets * Percentage of dogs
         = 36 * 0.25
         = 9

Cats = Total pets * Percentage of cats
         = 36 * 0.5
         = 18

Now, let's subtract the number of dogs and cats from the total number of pets to find out how many bunnies Michael has:

Bunnies = Total pets - (Dogs + Cats)
              = 36 - (9 + 18)
              = 36 - 27
              = 9

So, Michael has 9 bunnies.
****************************************************************************************************
Final result: 
9
****************************************************************************************************
Expected output: 9


In [24]:
re_exp = r'\b\d+(?:\.\d+)?\b'
results = dict()
# Record the results into dataframe
for i, question in enumerate(data_df['question']):
    response_1 = get_mistral_response(question)
    response_2 = get_mistral_response(question)
    response_3 = get_mistral_response(question)

    new_prompt_1 = 'Return only the number which represent final result from the following text: ' + response_1
    new_prompt_2 = 'Return only the number which represent final result from the following text: ' + response_2
    new_prompt_3 = 'Return only the number which represent final result from the following text: ' + response_3

    output_1 = re.findall(re_exp, get_mistral_response(new_prompt_1))  
    output_2 = re.findall(re_exp, get_mistral_response(new_prompt_2))  
    output_3 = re.findall(re_exp, get_mistral_response(new_prompt_3))  

    results[i] = results.get(i, []) + output_1
    results[i] = results.get(i, []) + output_2
    results[i] = results.get(i, []) + output_3

    if i > 12:
        break

In [26]:
results

{0: ['12', '12', '12'],
 1: ['44', '44', '40'],
 2: ['160', '160', '160'],
 3: ['245', '245', '245'],
 4: ['14', '14', '14'],
 5: ['130', '130', '130'],
 6: ['3600', '3600', '3600'],
 7: ['30', '30', '30'],
 8: ['25', '25', '25'],
 9: ['586', '586', '586'],
 10: ['9.00', '30', '9.00', '9.00', '30', '9.00', '9.00', '30', '9.00'],
 11: ['9', '9', '9'],
 12: ['450', '90', '3', '450', '90', '3', '450', '90', '3'],
 13: ['7', '7', '7']}

In [None]:
data_df['predict'] = results

In [23]:
text = "John buys 30 cans with a normal price of $0.60."
numbers = re.findall(r'\b\d+(?:\.\d+)?\b', text)
numbers

['30', '0.60']

### Ways to reach better performance:
* Ensemble Method: involves combining the outputs of multiple models
* ...