## Importing all the required libraries

In [1]:
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, AutoConfig
import os
from datasets import load_from_disk
os.environ['TRANSFORMERS_CACHE'] = '/scratch/workspace/wenlongzhao_umass_edu-analyze/transformers_cache'



## Loading the val dataset

In [2]:
data_feedback = load_from_disk("../datasets/gsm8k/feedback/")
data_feedback

Dataset({
    features: ['question', 'answer'],
    num_rows: 5082
})

In [3]:
data_train = load_from_disk("../datasets/gsm8k/train/")
data_train

Dataset({
    features: ['question', 'answer'],
    num_rows: 896
})

## Loading the model

In [4]:
hf_token = "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"
model_name = "meta-llama/Llama-3.2-3B-Instruct"
config = AutoConfig.from_pretrained(model_name, token=hf_token)
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token, config=config,cache_dir='/scratch/workspace/wenlongzhao_umass_edu-analyze/transformers_cache')
model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token, config=config,cache_dir='/scratch/workspace/wenlongzhao_umass_edu-analyze/transformers_cache')
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = 'left'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

device(type='cuda')

In [6]:
model.to(device)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 3072)
    (layers): ModuleList(
      (0-27): 28 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (k_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (v_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (o_proj): Linear(in_features=3072, out_features=3072, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=3072, out_features=8192, bias=False)
          (up_proj): Linear(in_features=3072, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=3072, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
      )
    )
    (norm

In [6]:
def create_prompt(question):
    prompt="Answer the below question: \n\n Question:\n\n"+question
    return prompt

In [7]:
# import random
# def create_fs_prompt(question,n,x):
#     '''
#     question: The question we want the model to answer
#     n: The number of examples for few shot
#     x: The data from which we get the few shot examples.
#     '''
#     prompt=""
#     for i in range(n):
#         ind=random.randint(0, len(x))
#         prompt+="Question: "+x["question"][ind]+"\n\nAnswer: "+x["answer"][ind]+"\n\n"
#     prompt+="Question: "+question
#     return prompt
    
        
        
    

In [7]:
fs8 = ""

for i in range(9):
        fs8+="Question: "+data_train["question"][i]+"\nAnswer: "+data_train["answer"][i]+"\n"

In [24]:
# import random
# def create_fs_prompt(question,n,x):
#     '''
#     question: The question we want the model to answer
#     n: The number of examples for few shot
#     x: The data from which we get the few shot examples.
#     '''
#     prompt=""
#     ind=random.randint(0, len(x)-9)
#     for i in range(ind,ind+9):
        
#         prompt+="Question: "+x["question"][i]+"\n\nAnswer: "+x["answer"][i]+"\n\n"
#     prompt+="Question: "+question
#     return prompt
    
        
        
    

In [9]:
import random
def create_fs_prompt(question,fs8):
    '''
    question: The question we want the model to answer
    n: The number of examples for few shot
    x: The data from which we get the few shot examples.
    '''
    prompt=""+fs8
    prompt+="Question: "+question
    return prompt
    
        
        
    

In [31]:
# input_text = create_fs_prompt(data_feedback["question"][0],5,data_train)
input_text = create_fs_prompt(data_feedback["question"][0],fs8)

In [32]:
print(input_text)

Question: Bobby wanted pancakes for breakfast.  The recipe on the box makes 21 pancakes.  While he ate 5 pancakes, his dog jumped up and was able to eat 7 before being caught.  How many pancakes does Bobby have left?

Answer: Bobby ate 5 pancakes and his dog ate 7 so 5+7 = <<5+7=12>>12
The recipe makes 21 pancakes and 12 were eaten so 21-12 = <<21-12=9>>9 pancakes were left
#### 9

Question: A store gives a 10% discount for the amount of the sell that was over $1000.  John buys 7 items that each cost $200.  What does his order cost after the discount?

Answer: His order came out to 7*200=$<<7*200=1400>>1400
So there was 1400-1000=$<<1400-1000=400>>400 that qualified for the discount
So his discount saved 400*.1=$<<400*.1=40>>40
So his purchase came out to 1400-40=$<<1400-40=1360>>1360
#### 1360

Question: An agricultural cooperative must ship 6500 kg of potatoes. During transport by truck, 150 kg are damaged and therefore cannot be sold. The potatoes are distributed in 50 kg bags, each

In [8]:
input_text=""+fs8
input_text+="Question: "+data_feedback["question"][9]


In [9]:
print(input_text)

Question: Bobby wanted pancakes for breakfast.  The recipe on the box makes 21 pancakes.  While he ate 5 pancakes, his dog jumped up and was able to eat 7 before being caught.  How many pancakes does Bobby have left?
Answer: Bobby ate 5 pancakes and his dog ate 7 so 5+7 = <<5+7=12>>12
The recipe makes 21 pancakes and 12 were eaten so 21-12 = <<21-12=9>>9 pancakes were left
#### 9
Question: A store gives a 10% discount for the amount of the sell that was over $1000.  John buys 7 items that each cost $200.  What does his order cost after the discount?
Answer: His order came out to 7*200=$<<7*200=1400>>1400
So there was 1400-1000=$<<1400-1000=400>>400 that qualified for the discount
So his discount saved 400*.1=$<<400*.1=40>>40
So his purchase came out to 1400-40=$<<1400-40=1360>>1360
#### 1360
Question: An agricultural cooperative must ship 6500 kg of potatoes. During transport by truck, 150 kg are damaged and therefore cannot be sold. The potatoes are distributed in 50 kg bags, each bag

In [11]:
input_text.splitlines()

['Question: Bobby wanted pancakes for breakfast.  The recipe on the box makes 21 pancakes.  While he ate 5 pancakes, his dog jumped up and was able to eat 7 before being caught.  How many pancakes does Bobby have left?',
 'Answer: Bobby ate 5 pancakes and his dog ate 7 so 5+7 = <<5+7=12>>12',
 'The recipe makes 21 pancakes and 12 were eaten so 21-12 = <<21-12=9>>9 pancakes were left',
 '#### 9',
 'Question: A store gives a 10% discount for the amount of the sell that was over $1000.  John buys 7 items that each cost $200.  What does his order cost after the discount?',
 'Answer: His order came out to 7*200=$<<7*200=1400>>1400',
 'So there was 1400-1000=$<<1400-1000=400>>400 that qualified for the discount',
 'So his discount saved 400*.1=$<<400*.1=40>>40',
 'So his purchase came out to 1400-40=$<<1400-40=1360>>1360',
 '#### 1360',
 'Question: An agricultural cooperative must ship 6500 kg of potatoes. During transport by truck, 150 kg are damaged and therefore cannot be sold. The potato

In [None]:
import json
from tqdm import tqdm
model.eval()
generated_outputs=[]
cnt=0
batch_size=32 # Adjust batch size according to your GPU memory capacity
for i in tqdm(range(0, len(data_feedback["question"]), batch_size), desc="Processing questions"):
    batch_questions = data_feedback["question"][i:i+batch_size]
    inputs = [fs8+"Question: "+q for q in batch_questions]
    tokenized_inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True)
    tokenized_inputs.to(device)

    with torch.no_grad():
        output = model.generate(**tokenized_inputs, max_length=2000, num_return_sequences=1, pad_token_id=tokenizer.pad_token_id)
   
    for j, o in enumerate(output):
        cnt+=1
        generated_text = tokenizer.decode(o, skip_special_tokens=True)
        # print(generated_text,"\n________________________________________\n\n")
        lines=generated_text.splitlines()
        linenumber = [i for i, line in enumerate(lines) if line.startswith("Question: ")][-1]
        answer = '\n'.join(lines[linenumber:])
        print(answer)
        generated_outputs.append({"input": inputs[j], "output": generated_text})
        break
    break


Processing questions:   0%|          | 0/159 [00:00<?, ?it/s]Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)
Processing questions:   0%|          | 0/159 [02:16<?, ?it/s]


AttributeError: 'list' object has no attribute 'join'

In [16]:
data_feedback["question"][4]

'Mr. Smith takes his wife, his parents, and his 3 children to a seafood buffet.  The price for the adult buffet is $30.  The price for the children’s buffet is $15.  Senior citizens get a 10% discount.  How much does Mr. Smith spend on the buffet for his entire family?'

In [18]:
import json
from tqdm import tqdm
model.eval()
generated_outputs=[]
batch_size=8
for i in tqdm(range(len(data_feedback["question"]),batch_size), desc="Processing questions"):
    # Create the prompt
    batch_questions = data_feedback["question"][i:i+batch_size]
    input_text = [fs8+"Question: "+q for q in batch_questions]
    # Tokenize input
    inputs = tokenizer(input_text, return_tensors="pt")
    inputs.to(device)
    # Generate output
    with torch.no_grad():
        output = model.generate(**inputs, max_length=2000, num_return_sequences=1,  pad_token_id=tokenizer.pad_token_id)
    # Decode the output
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    lines=generated_text.splitlines()
    linenumber = [i for i, line in enumerate(lines) if line.startswith("Question: ")][-1]
    answer = lines[linenumber:].join('\n')
    print(answer)
    # generated_outputs.append({"input":input_text,"output":})
    break

# with open("../outputs/gsm8k/generated_outputs.json", "w") as f:
#     json.dump(generated_outputs, f, indent=4)

    
    

Processing questions: 0it [00:00, ?it/s]


In [16]:
# input_text = create_prompt(data["question"][0])
# input_text
len(data_feedback["question"])

5082

In [3]:
data["answer"][0]

'The total revenue from the adults is 183 x $26 = $<<183*26=4758>>4758.\nThe ticket price for children is $26/2 = $<<26/2=13>>13.\nThe total revenue from the children is 28 x $13 = $<<28*13=364>>364.\nThe total revenue of the concert is $4758 + $364 = $<<4758+364=5122>>5122\n#### 5122'