In [1]:
import pandas as pd
from datasets import load_dataset
from sklearn.metrics import f1_score
from src.utils.utils import parallel_text_processing, extract_numbers, calculate_accuracy
from src.utils.utils import PromptTemplate

In [2]:
dataset = load_dataset("gsm8k", "main")

print(f'Dataset length : {len(dataset["test"])}')

test_samples = dataset["test"][:100]
questions = test_samples["question"]
long_answers = test_samples["answer"]
answers = [float(an.split("#### ")[-1]) for an in long_answers]

Dataset length : 1319


In [3]:
questions[0]

"Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"

In [4]:
long_answers[0]

'Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.\nShe makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.\n#### 18'

In [5]:
answers[0]

18.0

In [6]:
df = pd.DataFrame({"questions":questions, "long_answers": long_answers, "answer": answers})

In [7]:
df.head()

Unnamed: 0,questions,long_answers,answer
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0


In [16]:
system_prompt = """You are a helpful AI assistant who knows math."""
user_prompt = """Below I will provide a question with a math problem. 
Please solve it and present final number which is an answer to the problem. 
Do not show any explanation and do not provide units.

Question: {text}
Give answer in this form: {{"answer": "answer with final number"}}"""

prompt_template = PromptTemplate(system_prompt, user_prompt)

df_zero = parallel_text_processing(
    dataframe=df.copy(),
    col_with_content="questions",
    column="pred_solution",
    filename="test/math_problem_zero_shot.csv",
    model="gpt-4o-mini",
    prompt_template=prompt_template
)

100%|██████████| 100/100 [00:12<00:00,  7.97it/s]


In [20]:
df_zero.head()

Unnamed: 0,questions,long_answers,answer,system_prompt,user_prompt,pred_solution,extracted_number
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""26""}",26.0
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""3""}",3.0
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""65000""}",65000.0
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""540""}",540.0
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""30""}",30.0


In [18]:
df_zero = extract_numbers(dataset=df_zero, column_name='pred_solution', new_column_name='extracted_number')

In [21]:
df_zero.head()

Unnamed: 0,questions,long_answers,answer,system_prompt,user_prompt,pred_solution,extracted_number
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""26""}",26.0
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""3""}",3.0
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""65000""}",65000.0
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""540""}",540.0
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""30""}",30.0


In [22]:
calculate_accuracy(df_zero, "answer", "extracted_number")

Accuracy achieved is equal to 0.37


In [23]:
system_prompt = """You are a helpful AI assistant who knows math."""
user_prompt = """Below I will provide a question with a math problem. 
Please solve it and present final number which is an answer to the problem.
Example problems:

Problem 1:
Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?
Reasoning with answer: Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72 
Answer: {{"answer": "72"}}

Problem 2:
Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?
Reasoning with answer: Weng earns 12/60 = $<<12/60=0.2>>0.2 per minute.\nWorking 50 minutes, she earned 0.2 x 50 = $<<0.2*50=10>>10.\n#### 10
Answer: {{"answer": "72"}}

Problem 3:
Betty is saving money for a new wallet which costs $100. Betty has only half of the money she needs. Her parents decided to give her $15 for that purpose, and her grandparents twice as much as her parents. How much more money does Betty need to buy the wallet?
Reasoning with answer: In the beginning, Betty has only 100 / 2 = $<<100/2=50>>50.\nBetty's grandparents gave her 15 * 2 = $<<15*2=30>>30.\nThis means, Betty needs 100 - 50 - 30 - 15 = $<<100-50-30-15=5>>5 more.\n#### 5
Answer: {{"answer": "5"}}


Now your turn to solve a problem. Provide only answer. Do not show any reasoning and explanation and do not provide units.
Question: {text}
Give answer in this form: {{"answer": "answer with final number"}}"""

prompt_template = PromptTemplate(system_prompt, user_prompt)

df_few = parallel_text_processing(
    dataframe=df.copy(),
    col_with_content="questions",
    column="pred_solution",
    filename="test/math_problem_few_shot.csv",
    model="gpt-4o-mini",
    prompt_template=prompt_template
)

100%|██████████| 100/100 [00:06<00:00, 14.70it/s]


In [24]:
df_few.head()

Unnamed: 0,questions,long_answers,answer,system_prompt,user_prompt,pred_solution
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""18""}"
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""3""}"
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""65000""}"
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""540""}"
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""30""}"


In [25]:
df_few = extract_numbers(dataset=df_few, column_name='pred_solution', new_column_name='extracted_number')

In [26]:
df_few.head()

Unnamed: 0,questions,long_answers,answer,system_prompt,user_prompt,pred_solution,extracted_number
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""18""}",18.0
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""3""}",3.0
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""65000""}",65000.0
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""540""}",540.0
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""answer"": ""30""}",30.0


In [27]:
calculate_accuracy(df_few, "answer", "extracted_number")

Accuracy achieved is equal to 0.38


In [33]:
# Usage Example
system_prompt = "You are a helpful AI assistant who knows math."
user_prompt_template = """Below I will provide a question with a math problem. 
Please solve it and present the final number which is the answer to the problem. 
In the final answer do not provide units, give only the number.

Question: {text}
Give answer in this form: {{"reasoning": "Solve it step by step and provide reasoning and explanation", "answer": "final number"}}"""

prompt_template = PromptTemplate(system_prompt, user_prompt_template)

df_cot = parallel_text_processing(
    dataframe=df.copy(),
    col_with_content="questions",
    column="pred_solution",
    filename="test/math_problem_cot.csv",
    model="gpt-4o-mini",
    prompt_template=prompt_template
)

100%|██████████| 100/100 [00:38<00:00,  2.60it/s]


In [34]:
df_cot.head()

Unnamed: 0,questions,long_answers,answer,system_prompt,user_prompt,pred_solution
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""First, we need to determine how..."
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""To find the total number of bol..."
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""First, we calculate the total i..."
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""To find the total meters James ..."
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0,You are a helpful AI assistant who knows math.,Below I will provide a question with a math pr...,"{""reasoning"": ""First, we need to determine the..."


In [35]:
df_cot.pred_solution.iloc[0]

'{"reasoning": "First, we need to determine how many eggs Janet has left after she eats and bakes with them. Janet\'s ducks lay 16 eggs per day. She eats 3 eggs for breakfast and uses 4 eggs for baking muffins. Therefore, the total number of eggs she consumes each day is 3 + 4 = 7 eggs. Next, we subtract the number of eggs she consumes from the total number of eggs laid: 16 - 7 = 9 eggs. These 9 eggs are the ones she sells at the farmers\' market. Since she sells each egg for $2, we calculate her daily earnings by multiplying the number of eggs sold by the price per egg: 9 eggs * $2/egg = $18. Thus, Janet makes $18 every day at the farmers\' market.", "answer": "18"}'

In [36]:
df_cot=extract_numbers(dataset=df_cot, column_name='pred_solution', new_column_name='extracted_number')

In [37]:
calculate_accuracy(df_cot, "answer", "extracted_number")

Accuracy achieved is equal to 0.94


# Disinformation detection with GPT

In [7]:
df = pd.read_csv("../data/ECTF/test.csv")

In [8]:
df.head()

Unnamed: 0,id,content,label
0,1255615613923450882,Luc Montagnier won the Nobel Prize for for his...,real
1,1236143519133077504,If you pumped soap from those public soap disp...,fake
2,1265282794831392768,🔴 LIVE at 11:30 am: @GovernorTomWolf + @Secret...,real
3,1234899473882664960,When’s this coronavirus vaccine coming out?,fake
4,1248951179393904640,MTA employees have contracted COVID-19 at thre...,real


In [10]:
# Usage Example
system_prompt = "You are a helpful AI assistant who detects disinformation"
user_prompt_template = """Below I will provide a short text. 
Please provide answer if it is fake or real information.

Text: {text}
Give answer in this form: {{"answer": "fake or real"}}"""

prompt_template = PromptTemplate(system_prompt, user_prompt_template)

In [None]:
df_dis = parallel_text_processing(
    dataframe=df.copy(),
    col_with_content="content",
    column="pred_solution",
    filename="test/disinformation_classification_zero_shot.csv",
    model="gpt-4o-mini",
    prompt_template=prompt_template
)

In [16]:
df_dis.head()

Unnamed: 0,id,content,label,system_prompt,user_prompt,pred_solution
0,1255615613923450882,Luc Montagnier won the Nobel Prize for for his...,real,You are a helpful AI assistant who detects dis...,Below I will provide a short text. \nPlease pr...,"{""answer"": ""real""}"
1,1236143519133077504,If you pumped soap from those public soap disp...,fake,You are a helpful AI assistant who detects dis...,Below I will provide a short text. \nPlease pr...,"{""answer"": ""fake""}"
2,1265282794831392768,🔴 LIVE at 11:30 am: @GovernorTomWolf + @Secret...,real,You are a helpful AI assistant who detects dis...,Below I will provide a short text. \nPlease pr...,"{""answer"": ""real""}"
3,1234899473882664960,When’s this coronavirus vaccine coming out?,fake,You are a helpful AI assistant who detects dis...,Below I will provide a short text. \nPlease pr...,"{""answer"": ""real""}"
4,1248951179393904640,MTA employees have contracted COVID-19 at thre...,real,You are a helpful AI assistant who detects dis...,Below I will provide a short text. \nPlease pr...,"{""answer"": ""real""}"


In [18]:
y_pred = df_dis.pred_solution.apply(lambda x: 1 if "fake" in x.lower() else 0)
y_true = df_dis.label.apply(lambda x: 1 if "fake" in x.lower() else 0)
f1_score(y_true, y_pred, average="micro")

In [11]:
df_dis_llama = parallel_text_processing(
    dataframe=df.copy(),
    col_with_content="content",
    column="pred_solution",
    filename="result/disinformation_classification_zero_shot.csv",
    model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
    prompt_template=prompt_template
)

100%|██████████| 400/400 [00:30<00:00, 13.05it/s]


In [12]:
y_pred = df_dis_llama.pred_solution.apply(lambda x: 1 if "fake" in x.lower() else 0)
y_true = df_dis_llama.label.apply(lambda x: 1 if "fake" in x.lower() else 0)
f1_score(y_true, y_pred, average="micro")

0.8225