In [7]:
# !pip install datasets

In [1]:
import torch
from tqdm import tqdm
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from datasets import load_dataset

## Utils

In [2]:
class PromptTemplate:
    def __init__(self, system_prompt, user_prompt_template):
        self.system_prompt = system_prompt
        self.user_prompt_template = user_prompt_template

    def format_user_prompt(self, text):
        return self.user_prompt_template.format(text=text)

In [4]:
torch.random.manual_seed(0)
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")

In [50]:
dataset = load_dataset("gsm8k", "main")

print(f'Dataset length : {len(dataset["test"])}')

test_samples = dataset["test"][:100]
questions = test_samples["question"]
long_answers = test_samples["answer"]
answers = [float(an.split("#### ")[-1]) for an in long_answers]

Dataset length : 1319


In [51]:
df = pd.DataFrame({"questions":questions, "long_answers": long_answers, "answer": answers})

In [41]:
system_prompt = """You are a helpful AI assistant who knows math."""
user_prompt = """Below I will provide a question with a math problem.
Please solve it and present final number which is an answer to the problem.
Do not show any explanation and do not provide units.

Question: {text}
Give answer in this form: {{"answer": "answer with final number"}}"""

prompt_template = PromptTemplate(system_prompt, user_prompt)
system_prompt = prompt_template.system_prompt

In [33]:
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt},
]

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

generation_args = {
    "max_new_tokens": 500,
    "return_full_text": False,
    "temperature": 0.0
}

Device set to use cuda


In [None]:
%%capture hidden_tests
import hashlib

def run_hidden_tests():
    try:
        # Access student's variable dynamically
        if "outputs_zero_shot" not in globals():
            print("❌ Test failed: Variable 'outputs_zero_shot' not found.")
            return
        
        result = globals()["outputs_zero_shot"]
        
        # Check if it's a list of 5 elements
        assert isinstance(result, list), "Output should be a list."
        assert len(result) == 5, "List should contain exactly 5 elements."
        
        # Check each element
        for element in result:
            assert isinstance(element, dict), "Each element should be a dictionary."
            assert len(element) == 1, "Each dictionary should contain exactly one key."
            assert "answer" in element, 'Dictionary key should be "answer".'
        
        # Hash-based tamper protection
        hash_value = hashlib.sha256("All tests passed!".encode()).hexdigest()
        assert hash_value == "2a40b0c7f36a22d6765c47397db228617d35d7a3a5d6bbbc6dc8fdac6f0b6db2", "Integrity check failed."
        
        print("✅ All tests passed.")
        
    except AssertionError as e:
        print(f"❌ Test failed: {e}")

In [44]:
df["answer"] = None
outputs_zero_shot = []
for index, question in tqdm(enumerate(df.questions.iloc[:5])):
  user_prompt = prompt_template.format_user_prompt(question)
  messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt},
  ]
  output = pipe(messages, **generation_args)
  outputs_zero_shot.append(output)
  print(output)

1it [00:01,  1.49s/it]

 {"answer": 64}


2it [00:02,  1.09s/it]

 {"answer": "5"}


3it [00:03,  1.17s/it]

 {"answer": 120000}


4it [00:04,  1.08s/it]

 {"answer": 540}


5it [00:05,  1.15s/it]

 {"answer": 10}





In [None]:
# Run the hidden tests
run_hidden_tests()

In [53]:
# Usage Example
system_prompt = "You are a helpful AI assistant who knows math."
user_prompt_template = """Below I will provide a question with a math problem.
Please solve it and present the final number which is the answer to the problem.
In the final answer do not provide units, give only the number.

Question: {text}
Give answer in this form: {{"reasoning": "Solve it step by step and provide reasoning and explanation", \n "answer": "final number"}}"""

prompt_template = PromptTemplate(system_prompt, user_prompt_template)

In [55]:
df["answer"] = None
outputs_cot = []
for index, question in tqdm(enumerate(df.questions.iloc[:5])):
  user_prompt = prompt_template.format_user_prompt(question)
  messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt},
  ]
  output = pipe(messages, **generation_args)
  outputs_cot.append(output)
  print(output)

1it [00:09,  9.83s/it]

 {"reasoning": "First, we need to find out how many duck eggs Janet has left after eating and baking. She starts with 16 eggs per day. She eats 3 for breakfast and uses 4 for baking muffins. So, 16 - 3 - 4 = 9 eggs are left. She sells these 9 eggs at the farmers' market for $2 each. To find out how much she makes daily, we multiply the number of eggs by the price per egg: 9 * 2 = 18. Therefore, Janet makes $18 every day at the farmers' market.", "answer": "18"}


2it [00:21, 10.77s/it]

 {"reasoning": "To solve this problem, we need to determine the amount of white fiber required and then add it to the amount of blue fiber. The robe takes 2 bolts of blue fiber. Since it takes half as much white fiber as blue fiber, we need to calculate half of 2 bolts. Half of 2 bolts is 1 bolt. Now, we add the blue fiber (2 bolts) and the white fiber (1 bolt) together to find the total number of bolts needed for the robe. So, 2 bolts (blue) + 1 bolt (white) = 3 bolts in total.", "answer": "3"}


3it [00:32, 11.18s/it]

 {"reasoning": "First, we need to calculate the increased value of the house after the repairs. Josh put in $50,000 in repairs, and this increased the value of the house by 150%. To find the increased value, we multiply the repair cost by the percentage increase: $50,000 * 1.5 = $75,000. Now, we add the increased value to the original purchase price to find the new value of the house: $80,000 + $75,000 = $155,000. To find the profit, we subtract the total cost (purchase price + repair cost) from the new value of the house: $155,000 - ($80,000 + $50,000) = $155,000 - $130,000 = $25,000. So, Josh made a profit of $25,000.", "answer": "25000"}


4it [00:37,  8.72s/it]

 {"reasoning": "To find the total distance James runs in a week, we need to multiply the number of sprints he runs per session by the distance of each sprint, and then multiply that by the number of sessions per week. So, we have 3 sprints per session * 60 meters per sprint * 3 sessions per week. This gives us a total of 540 meters per week.", "answer": "540"}


5it [00:58, 11.76s/it]

 {"reasoning": "To solve this problem, we need to find out how much feed each chicken gets in a day and then calculate the total amount of feed needed for the entire flock. 

First, let's find out how much feed each chicken gets in a day. Wendi feeds her chickens three cups of mixed chicken feed in three separate meals. So, each chicken gets 3 cups / 3 meals = 1 cup per meal.

Now, let's calculate the total amount of feed needed for the entire flock of 20 chickens. Since each chicken needs 1 cup of feed per meal, and there are 3 meals in a day, each chicken needs 1 cup/meal * 3 meals = 3 cups of feed per day.

For the entire flock of 20 chickens, the total amount of feed needed per day is 20 chickens * 3 cups/chicken = 60 cups of feed.

In the morning, Wendi gives her chickens 15 cups of feed, and in the afternoon, she gives them another 25 cups of feed. So, the total amount of feed given in the morning and afternoon is 15 cups + 25 cups = 40 cups of feed.

To find out how much feed We


