In [2]:
# !pip install -U datasets
# !pip install transformers==4.38.2

In [1]:
from tqdm import tqdm
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from datasets import load_dataset

## Utils

In [2]:
class PromptTemplate:

    def __init__(self, developer_prompt, user_prompt_template):
        self.developer_prompt = developer_prompt
        self.user_prompt_template = user_prompt_template

    def format_user_prompt(self, text):
        return self.user_prompt_template.format(text=text)

# Loading Pre-Trained Model

In [3]:
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)


tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


# Loading Dataset

In [7]:
dataset = load_dataset("openai/gsm8k", "main")

print(f'Dataset length : {len(dataset["test"])}')

test_samples = dataset["test"][:5]
questions = test_samples["question"]
long_answers = test_samples["answer"]
answers = [float(an.split("#### ")[-1]) for an in long_answers]

Dataset length : 1319


In [8]:
df = pd.DataFrame({"questions":questions, "long_answers": long_answers, "answer": answers})

In [9]:
df.head()

Unnamed: 0,questions,long_answers,answer
0,Janet’s ducks lay 16 eggs per day. She eats th...,Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eg...,18.0
1,A robe takes 2 bolts of blue fiber and half th...,It takes 2/2=<<2/2=1>>1 bolt of white fiber\nS...,3.0
2,Josh decides to try flipping a house. He buys...,The cost of the house and repairs came out to ...,70000.0
3,James decides to run 3 sprints 3 times a week....,He sprints 3*3=<<3*3=9>>9 times\nSo he runs 9*...,540.0
4,"Every day, Wendi feeds each of her chickens th...","If each chicken eats 3 cups of feed per day, t...",20.0


# Zero-Shot Prompting

In [10]:
developer_prompt = """You are a helpful AI assistant who knows math."""
user_prompt = """Below I will provide a question with a math problem.
Please solve it and present final number which is an answer to the problem.
Do not show any explanation and do not provide units.

Question: {text}
Give answer in this form: {{"answer": "answer with final number"}}"""

prompt_template = PromptTemplate(developer_prompt, user_prompt)
developer_prompt = prompt_template.developer_prompt

In [11]:
messages = [
    {"role": "developer", "content": developer_prompt},
    {"role": "user", "content": user_prompt},
]

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False,
    "temperature": 0.0
}

In [12]:
for index, question in tqdm(enumerate(df.questions.iloc[:5])):
    user_prompt = prompt_template.format_user_prompt(question)
    messages = [
        {"role": "developer", "content": developer_prompt},
        {"role": "user", "content": user_prompt},
    ]
    output = pipe(messages, **generation_args)
    print("Raw output:", output[0]["generated_text"])

1it [00:02,  2.27s/it]

Raw output:  {"answer": 64}


2it [00:02,  1.31s/it]

Raw output:  {"answer": "5"}


3it [00:03,  1.11s/it]

Raw output:  {"answer": 120000}


4it [00:04,  1.03it/s]

Raw output:  {"answer": 540}


5it [00:05,  1.14s/it]

Raw output:  {"answer": 10}





# Zero Shot with Chain of Thought Prompting

In [13]:
# Usage Example
developer_prompt = "You are a helpful AI assistant who knows math."
user_prompt_template = """Below I will provide a question with a math problem.
Please solve it and present the final number which is the answer to the problem.
In the final answer do not provide units, give only the number.

Question: {text}
Give answer in this form: {{"reasoning": "Solve it step by step and provide reasoning and explanation", \n "answer": "final number"}}"""

prompt_template = PromptTemplate(developer_prompt, user_prompt_template)

In [14]:
for index, question in tqdm(enumerate(df.questions.iloc[:5])):
    user_prompt = prompt_template.format_user_prompt(question)
    messages = [
        {"role": "developer", "content": developer_prompt},
        {"role": "user", "content": user_prompt},
    ]
    output = pipe(messages, **generation_args)

    print("Raw output:", output[0]["generated_text"])

1it [00:07,  7.03s/it]

Raw output:  {"reasoning": "First, we need to find out how many duck eggs Janet has left after eating and baking. She starts with 16 eggs per day. She eats 3 for breakfast and uses 4 for baking muffins. So, 16 - 3 - 4 = 9 eggs are left. She sells these 9 eggs at the farmers' market for $2 each. To find out how much she makes, we multiply the number of eggs by the price per egg: 9 * 2 = 18. Therefore, Janet makes $18 every day at the farmers' market.", "answer": "18"}


2it [00:15,  7.83s/it]

Raw output:  {"reasoning": "To solve this problem, we need to find out how many bolts of white fiber are needed and then add that to the number of bolts of blue fiber. The robe requires 2 bolts of blue fiber. Since it takes half as much white fiber as blue fiber, we need to calculate half of 2 bolts. Half of 2 is 1. Now, we add the bolts of blue fiber (2) to the bolts of white fiber (1). The total number of bolts needed for the robe is 2 + 1 = 3.", "answer": "3"}


3it [00:33, 12.70s/it]

Raw output:  {"reasoning": "First, we need to calculate the increased value of the house after the repairs. The house was initially worth $80,000, and the repairs increased its value by 150%. To find the increased value, we multiply the initial value by the percentage increase: $80,000 * 1.5 = $120,000. Now, we add the increased value to the initial value to find the final value of the house: $80,000 + $120,000 = $200,000. Next, we need to calculate the total cost of the house and repairs: $80,000 (initial value) + $50,000 (repairs) = $130,000. Finally, we subtract the total cost from the final value to find the profit: $200,000 (final value) - $130,000 (total cost) = $70,000. So, Josh made a profit of $70,000.", "answer": "70000"}


4it [00:39,  9.76s/it]

Raw output:  {"reasoning": "To find the total distance James runs in a week, we need to multiply the number of sprints he runs per session by the distance of each sprint, and then multiply that by the number of sessions per week. So, we have 3 sprints per session * 60 meters per sprint * 3 sessions per week. This gives us a total of 540 meters per week.", "answer": "540"}


5it [00:53, 10.71s/it]

Raw output:  {"reasoning": "To solve this problem, we need to find out how much feed each chicken gets in total and then calculate the total amount of feed needed for the entire flock. Since Wendi feeds her chickens three cups of mixed chicken feed per day, and she has 20 chickens, we can calculate the total amount of feed needed for the entire flock as follows:

Total feed needed = (3 cups per chicken) * (20 chickens) = 60 cups

Now, we know that Wendi has already given her chickens 15 cups in the morning and 25 cups in the afternoon. To find out how much feed is needed for the final meal, we subtract the amount already given from the total amount needed:

Feed needed for the final meal = Total feed needed - (Morning feed + Afternoon feed)
Feed needed for the final meal = 60 cups - (15 cups + 25 cups)
Feed needed for the final meal = 60 cups - 40 cups
Feed needed for the final meal = 20 cups

So, Wendi needs to give her chickens 20 cups of feed in the final meal of the day.", 
 "answe


