In [1]:
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, AutoConfig
import os
from datasets import load_from_disk
os.environ['TRANSFORMERS_CACHE'] = '../transformers_cache/'


In [2]:
data_test = load_from_disk("../datasets/gsm8k/test/")
data_test

Dataset({
    features: ['question', 'answer'],
    num_rows: 1319
})

In [3]:
data_train = load_from_disk("../datasets/gsm8k/train/")
data_train

Dataset({
    features: ['question', 'answer'],
    num_rows: 896
})

In [4]:
hf_token = os.getenv("hf_token")
model_name = "meta-llama/Llama-3.2-3B-Instruct"
config = AutoConfig.from_pretrained(model_name, token=hf_token)
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token, config=config,cache_dir='../transformers_cache')
model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token, config=config,cache_dir='../transformers_cache')
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = 'left'
# model.resize_token_embeddings(len(tokenizer))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

device(type='cuda')

In [5]:
model.to(device)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 3072)
    (layers): ModuleList(
      (0-27): 28 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (k_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (v_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (o_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=3072, out_features=8192, bias=False)
          (up_proj): Linear(in_features=3072, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=3072, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
      )
    )
    (norm

In [6]:
# 8-shot prompt
fewShotPrompt=f'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n'
'\n\nBelow are few example question and answer pairs\n\n'

fewShotPrompt += "Here your job is to answer a math question. "
fewShotPrompt += f"Your question will appear after 8 demonstrations of similar math tasks. "
fewShotPrompt += "As in those demonstrations, you must generate both a step-by-step reasoning and a final answer. "
fewShotPrompt += "Perform a simple action, e.g., a single mathematical operation, at each step of your reasoning. "
fewShotPrompt += "Your final answer must contain only a number and no additional text. "
fewShotPrompt += "State your final answer after ####. "

print(fewShotPrompt)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>

Here your job is to answer a math question. Your question will appear after 8 demonstrations of similar math tasks. As in those demonstrations, you must generate both a step-by-step reasoning and a final answer. Perform a simple action, e.g., a single mathematical operation, at each step of your reasoning. Your final answer must contain only a number and no additional text. State your final answer after ####. 


In [7]:
for i in range(9):
    fewShotPrompt+=f'Q: {data_train["question"][i]}\nA: {data_train["answer"][i]}\n\n'

In [11]:
from tqdm import tqdm
model.eval()
generated_outputs=[]
# Adjust batch size according to your GPU memory capacity
# With vram=80G, 1B - 64, 3B - 64, 8B - 32
batch_size=2
for i in tqdm(range(0, len(data_test["question"]), batch_size), desc="Processing questions"):
    batch_questions = data_test["question"][i:i+batch_size]
    inputs = [fewShotPrompt+"Now, solve the below question following the instructions given above. \n\nQ: "+q+"\nA: <|eot_id|><|start_header_id|>assistant<|end_header_id|>" for q in batch_questions]
    # inputs = [fewShotPrompt+"Now, Follow the same format for reasoning and stating your final answer as above examples and Answer the below question\n\nQ: "+q+"<|eot_id|><|start_header_id|>assistant<|end_header_id|>" for q in batch_questions]
    tokenized_inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True)
    tokenized_inputs.to(device)

    with torch.no_grad():
        output = model.generate(**tokenized_inputs, max_length=2000, num_return_sequences=1, pad_token_id=tokenizer.pad_token_id, do_sample=True, temperature=0.1, top_p=0.95)
        # output = model.generate(**tokenized_inputs, max_new_tokens=256, num_return_sequences=1, pad_token_id=tokenizer.pad_token_id, do_sample=True, temperature=0.1, top_p=0.95)
   
    for j, o in enumerate(output):
        generated_text = tokenizer.decode(o, skip_special_tokens=True)
        answer = generated_text.split("A: assistant")[-1]
        generated_outputs.append({"input": inputs[j], "output": generated_text, "question": batch_questions[j], "answer":answer})
        print(batch_questions[j])
        print(answer)
    break

Processing questions:   0%|          | 0/660 [00:04<?, ?it/s]

Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?


First, find the number of eggs laid per day that Janet sells at the market: 16 eggs - 3 eggs - 4 eggs = <<16-3-4=9>>9 eggs
Then multiply the number of eggs sold by the price per egg: 9 eggs * $2/egg = $<<9*2=18>>18
#### 18
A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?


To find the total number of bolts needed, I need to first find the number of bolts of white fiber. Since it takes half as much white fiber as blue fiber, I will divide the number of blue fiber bolts by 2.

2 bolts of blue fiber / 2 = 1 bolt of white fiber

Now, I will add the number of bolts of blue fiber and white fiber to find the total number of bolts.

2 bolts of blue fiber + 1 b


