In [1]:
# !pip install datasets
# !pip install python-dotenv

In [2]:
import json
import os
import time
import pandas as pd
from datasets import load_dataset
import concurrent.futures
from tqdm import tqdm
from openai import OpenAI
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

API_KEY = os.getenv("OPENAI_API_KEY")
DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY")

# Utils

In [3]:
class PromptTemplate:
    def __init__(self, developer_prompt, user_prompt_template):
        self.developer_prompt = developer_prompt
        self.user_prompt_template = user_prompt_template

    def format_user_prompt(self, text):
        return self.user_prompt_template.format(text=text)

In [4]:
def client_instance(model):
    if "llama" in model.lower():
        return OpenAI(api_key=DEEPINFRA_API_KEY, base_url="https://api.deepinfra.com/v1/openai")
    else:
        return OpenAI(api_key=API_KEY)
    
def process_text_with_model(index, text, model, prompt_template):
    try:
        user_prompt = prompt_template.format_user_prompt(text)
        client = client_instance(model)
        completion = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "developer", "content": prompt_template.developer_prompt},
                {"role": "user", "content": user_prompt},
            ],
            temperature=0.0
        )
        return {
            "index": index,
            "developer_prompt": prompt_template.developer_prompt,
            "user_prompt": user_prompt,
            "completion": completion.choices[0].message.content,
        }
    except Exception as e:
        print(f"Error processing row {index}: {e}")
        time.sleep(2)  # Avoid rapid retries
        return {"index": index, "developer_prompt": None, "user_prompt": None, "completion": None}
    
    
def parallel_text_processing(dataframe, col_with_content, column, filename, model, prompt_template):
    dataframe["developer_prompt"] = None
    dataframe["user_prompt"] = None
    dataframe[column] = None
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    results = []

    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [
            executor.submit(process_text_with_model, index, text, model, prompt_template)
            for index, text in enumerate(dataframe[col_with_content])
        ]
        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
            try:
                result = future.result()
                if result:
                    results.append(result)
            except Exception as e:
                print(f"Thread failed with error: {e}")

    for result in results:
        dataframe.at[result["index"], "developer_prompt"] = result["developer_prompt"]
        dataframe.at[result["index"], "user_prompt"] = result["user_prompt"]
        dataframe.at[result["index"], column] = result["completion"]

    dataframe.to_csv(filename, index=False)
    return dataframe


def extract_numbers(dataset, column_name, new_column_name):
    """
    Extracts numbers from a JSON string in the specified column and creates a new column with those numbers.

    Parameters:
        dataset (pd.DataFrame): The input DataFrame.
        column_name (str): The name of the column containing JSON strings.
        new_column_name (str): The name of the new column to store extracted numbers.

    Returns:
        pd.DataFrame: The DataFrame with the new column added.
    """
    dataset = dataset.copy()

    def safe_extract(json_str):
        if pd.notna(json_str):
            try:
                # Ensure proper escaping
                json_str = json_str.replace("\\", "\\\\")
                return float(json.loads(json_str)['answer'])
            except (json.JSONDecodeError, KeyError, ValueError) as e:
                print(f"Error parsing JSON: {e} | Data: {json_str}")
                return None
        return None

    dataset[new_column_name] = dataset[column_name].apply(safe_extract)

    return dataset


def calculate_accuracy(dataset, true_col, pred_col):
    agreement = dataset[dataset[true_col] == dataset[pred_col]].shape[0]
    accuracy = agreement / dataset.shape[0]

    return accuracy

# Dataset Preparation

Link to dataset: [GSM8K](https://huggingface.co/datasets/openai/gsm8k)

In [11]:
# Using the Hugging Face datasets library to load the GSM8K ("Grade School Math 8K") dataset.
# GSM8K (Grade School Math 8K) is a dataset of 8.5K high quality linguistically diverse grade school math word problems. 
# The dataset was created to support the task of question answering on basic mathematical problems that require multi-step reasoning.

dataset = load_dataset("gsm8k", "main")

test_samples = dataset["test"][:100]
questions = test_samples["question"]
long_answers = test_samples["answer"]
answers = [float(an.split("#### ")[-1]) for an in long_answers]

print(f'Dataset length : {len(test_samples["question"])}')

Dataset length : 100


In [12]:
questions[1]

'A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?'

In [13]:
long_answers[1]

'It takes 2/2=<<2/2=1>>1 bolt of white fiber\nSo the total amount of fabric is 2+1=<<2+1=3>>3 bolts of fabric\n#### 3'

In [15]:
answers[1]

3.0

In [16]:
df = pd.DataFrame({"questions":questions, "long_answers": long_answers, "answer": answers})

In [17]:
df.head()

Unnamed: 0,questions,long_answers,answer
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0


# Prompting with closed models from OpenAI

[OpenAI documentation](https://platform.openai.com/docs/guides/text?api-mode=chat)

In [19]:
text = df.questions.iloc[1]
text

'A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?'

In [20]:
developer_prompt = """You are a helpful AI assistant who knows math."""

user_prompt = f"""Below I will provide a question with a math problem. 
Please solve it and present final number which is an answer to the problem. 
Do not show any explanation and do not provide units.

Question: {text}
Give answer in this form: {{"answer": "answer with final number"}}"""

In [21]:
client = OpenAI(api_key=API_KEY)
model = "gpt-4o-mini"
completion = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "developer", "content": developer_prompt},
                {"role": "user", "content": user_prompt},
            ],
            temperature=0.0
        )

In [22]:
completion.choices[0].message.content

'{"answer": "3"}'

In [23]:
df.answer.iloc[1]

np.float64(3.0)

## Zero-Shot Prompting 

In below examples we will use prompt templates and previously created functions that will automate our work

In [24]:
developer_prompt = """You are a helpful AI assistant who knows math."""
user_prompt = """Below I will provide a question with a math problem. 
Please solve it and present final number which is an answer to the problem. 
Do not show any explanation and do not provide units.

Question: {text}
Give answer in this form: {{"answer": "answer with final number"}}"""

prompt_template = PromptTemplate(developer_prompt, user_prompt)

In [25]:
prompt_template.developer_prompt

'You are a helpful AI assistant who knows math.'

In [26]:
prompt_template.user_prompt_template

'Below I will provide a question with a math problem. \nPlease solve it and present final number which is an answer to the problem. \nDo not show any explanation and do not provide units.\n\nQuestion: {text}\nGive answer in this form: {{"answer": "answer with final number"}}'

In [27]:
prompt_template.format_user_prompt(text)

'Below I will provide a question with a math problem. \nPlease solve it and present final number which is an answer to the problem. \nDo not show any explanation and do not provide units.\n\nQuestion: A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?\nGive answer in this form: {"answer": "answer with final number"}'

In [28]:
df.head()

Unnamed: 0,questions,long_answers,answer
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0


In [29]:
df.shape

(100, 3)

In [30]:
df_zero = parallel_text_processing(
    dataframe=df.copy(),
    col_with_content="questions",
    column="pred_solution",
    filename="result/math_problem_zero_shot.csv",
    model="gpt-4o-mini",
    prompt_template=prompt_template
)

100%|██████████| 100/100 [00:04<00:00, 20.46it/s]


In [31]:
df_zero.head()

Unnamed: 0,questions,long_answers,answer,developer_prompt,user_prompt,pred_solution
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""26""}"
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""3""}"
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""85000""}"
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""540""}"
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""30""}"


In [32]:
df_zero = extract_numbers(dataset=df_zero, column_name='pred_solution', new_column_name='extracted_number')

In [33]:
df_zero.head()

Unnamed: 0,questions,long_answers,answer,developer_prompt,user_prompt,pred_solution,extracted_number
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""26""}",26.0
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""3""}",3.0
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""85000""}",85000.0
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""540""}",540.0
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""30""}",30.0


In [34]:
calculate_accuracy(df_zero, "answer", "extracted_number")

0.37

## Few-shot Prompting

In [46]:
developer_prompt = """You are a helpful AI assistant who knows math.

Example problems:
Problem 1:
Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?
Reasoning with answer: Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72 
Answer: {{"answer": "72"}}

Problem 2:
Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?
Reasoning with answer: Weng earns 12/60 = $<<12/60=0.2>>0.2 per minute.\nWorking 50 minutes, she earned 0.2 x 50 = $<<0.2*50=10>>10.\n#### 10
Answer: {{"answer": "72"}}

Problem 3:
Betty is saving money for a new wallet which costs $100. Betty has only half of the money she needs. Her parents decided to give her $15 for that purpose, and her grandparents twice as much as her parents. How much more money does Betty need to buy the wallet?
Reasoning with answer: In the beginning, Betty has only 100 / 2 = $<<100/2=50>>50.\nBetty's grandparents gave her 15 * 2 = $<<15*2=30>>30.\nThis means, Betty needs 100 - 50 - 30 - 15 = $<<100-50-30-15=5>>5 more.\n#### 5
Answer: {{"answer": "5"}}
"""
user_prompt = """Below I will provide a question with a math problem. 
Please solve it and present final number which is an answer to the problem.
Provide only answer. Do not show any reasoning and explanation and do not provide units.
Question: {text}
Give answer in this form: {{"answer": "answer with final number"}}"""

prompt_template = PromptTemplate(developer_prompt, user_prompt)

df_few = parallel_text_processing(
    dataframe=df.copy(),
    col_with_content="questions",
    column="pred_solution",
    filename="result/math_problem_few_shot.csv",
    model="gpt-4o-mini",
    prompt_template=prompt_template
)

100%|██████████| 100/100 [00:04<00:00, 21.60it/s]


In [47]:
df_few.head()

Unnamed: 0,questions,long_answers,answer,developer_prompt,user_prompt,pred_solution
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0,You are a helpful AI assistant who knows math....,Below I will provide a question with a math pr...,"{""answer"": ""18""}"
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0,You are a helpful AI assistant who knows math....,Below I will provide a question with a math pr...,"{""answer"": ""3""}"
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0,You are a helpful AI assistant who knows math....,Below I will provide a question with a math pr...,"{""answer"": ""80000""}"
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0,You are a helpful AI assistant who knows math....,Below I will provide a question with a math pr...,"{""answer"": ""540""}"
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0,You are a helpful AI assistant who knows math....,Below I will provide a question with a math pr...,"{""answer"": ""60""}"


In [48]:
df_few = extract_numbers(dataset=df_few, column_name='pred_solution', new_column_name='extracted_number')

In [49]:
df_few.head()

Unnamed: 0,questions,long_answers,answer,developer_prompt,user_prompt,pred_solution,extracted_number
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0,You are a helpful AI assistant who knows math....,Below I will provide a question with a math pr...,"{""answer"": ""18""}",18.0
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0,You are a helpful AI assistant who knows math....,Below I will provide a question with a math pr...,"{""answer"": ""3""}",3.0
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0,You are a helpful AI assistant who knows math....,Below I will provide a question with a math pr...,"{""answer"": ""80000""}",80000.0
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0,You are a helpful AI assistant who knows math....,Below I will provide a question with a math pr...,"{""answer"": ""540""}",540.0
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0,You are a helpful AI assistant who knows math....,Below I will provide a question with a math pr...,"{""answer"": ""60""}",60.0


In [50]:
calculate_accuracy(df_few, "answer", "extracted_number")

0.38

## Chain-of-Thought Prompting (with Zero-Shot)

In [40]:
# Usage Example
developer_prompt = "You are a helpful AI assistant who knows math."
user_prompt_template = """Below I will provide a question with a math problem. 
Please solve it and present the final number which is the answer to the problem. 
In the final answer do not provide units, give only the number.

Question: {text}
Give answer in this form: {{"reasoning": "Solve it step by step and provide reasoning and explanation", "answer": "final number"}}"""

prompt_template = PromptTemplate(developer_prompt, user_prompt_template)

df_cot = parallel_text_processing(
    dataframe=df.copy(),
    col_with_content="questions",
    column="pred_solution",
    filename="result/math_problem_cot.csv",
    model="gpt-4o-mini",
    prompt_template=prompt_template
)

100%|██████████| 100/100 [00:30<00:00,  3.32it/s]


In [41]:
df_cot.head()

Unnamed: 0,questions,long_answers,answer,developer_prompt,user_prompt,pred_solution
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""First, we need to determine how..."
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""To find the total number of bol..."
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""First, we calculate the total i..."
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""To find the total meters James ..."
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""First, we need to determine the..."


In [42]:
df_cot.pred_solution.iloc[0]

'{"reasoning": "First, we need to determine how many eggs Janet has left after she eats and bakes with them. Janet\'s ducks lay 16 eggs per day. She eats 3 eggs for breakfast and uses 4 eggs for baking muffins. Therefore, the total number of eggs she consumes each day is 3 + 4 = 7 eggs. Next, we subtract the number of eggs she consumes from the total number of eggs laid: 16 - 7 = 9 eggs. These 9 eggs are the ones she sells at the farmers\' market. Since she sells each egg for $2, we calculate her daily earnings by multiplying the number of eggs sold by the price per egg: 9 eggs * $2/egg = $18. Thus, Janet makes $18 every day at the farmers\' market.", "answer": "18"}'

In [43]:
df_cot=extract_numbers(dataset=df_cot, column_name='pred_solution', new_column_name='extracted_number')

In [44]:
df_cot.head()

Unnamed: 0,questions,long_answers,answer,developer_prompt,user_prompt,pred_solution,extracted_number
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""First, we need to determine how...",18.0
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""To find the total number of bol...",3.0
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""First, we calculate the total i...",70000.0
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""To find the total meters James ...",540.0
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""First, we need to determine the...",20.0


In [45]:
calculate_accuracy(df_cot, "answer", "extracted_number")

0.91