# RFT - GMS8K 
- Candidate: Eric Liu 

In [21]:
%load_ext autoreload
%autoreload 2

import os  
import torch 
import numpy as np 

from tqdm import tqdm 
from textwrap import dedent  

import utils 
import prompt 
from utils import GSM8KParser
from datasets import load_dataset

from sympy.parsing.sympy_parser import parse_expr 
from transformers import AutoTokenizer, AutoModelForCausalLM 

os.environ["TOKENIZERS_PARALLELISM"] = "true"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Dataset Exploration 

In [22]:
train_dataset = load_dataset('gsm8k', 'main')['train']
val_dataset = load_dataset('gsm8k', 'main')['test'] 

In [23]:
print(len(train_dataset))
print(len(val_dataset))

7473
1319


In [24]:
for _ in range(5):
    seed = np.random.randint(0, len(train_dataset))
    print("*"*100)
    print(f"Checking instance {seed}:")
    utils.inspect_instance(train_dataset, seed)

****************************************************************************************************
Checking instance 598:
question
Federal guidelines recommend eating at least 2 cups of vegetables per day. From breakfast on Sunday to the end of the day on Thursday, Sarah has eaten 8 cups. How many cups per day does Sarah need to eat of her vegetables in order to meet her daily minimum requirement for the week?
answer
There are 7 days in a week and 2 cups are recommended per day, bringing the total to 7 days * 2 cups/day =<<7*2=14>>14 cups for the week.
Out of the 14 total cups needed for the week, Sarah has already eaten 8, for a total of 14 cups - 8 cups = <<14-8=6>>6 cups left to consume for the week.
Sunday through Thursday equals 5 days that have passed. If there are 7 days in the week then Sarah has 7 days - 5 days = <<7-5=2>>2 days left of the week.
If she needs to eat 6 more cups in the next 2 days then she will have to eat 6 cups / 2 days = <<6/2=3>>3 cups per day over the ne

In [39]:
train_dataset = train_dataset.map(lambda x: GSM8KParser.get_num_hops(x['answer']))
train_dataset = train_dataset.map(lambda x: GSM8KParser.get_answer_from_gt(x['answer']))
answer_str_inf = train_dataset.map(lambda x: GSM8KParser.get_answer_from_pred(x['answer']))['answer_str_digit']

Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

In [40]:
for gt, inf in zip(train_dataset['answer_str_digit'], answer_str_inf):
    assert gt == inf

In [41]:
val_dataset = val_dataset.map(lambda x: GSM8KParser.get_num_hops(x['answer']))
val_dataset = val_dataset.map(lambda x: GSM8KParser.get_answer_from_gt(x['answer']))
answer_str_inf = val_dataset.map(lambda x: GSM8KParser.get_answer_from_pred(x['answer']))['answer_str_digit']

Map:   0%|          | 0/1319 [00:00<?, ? examples/s]

Map:   0%|          | 0/1319 [00:00<?, ? examples/s]

Map:   0%|          | 0/1319 [00:00<?, ? examples/s]

In [42]:
for gt, inf in zip(val_dataset['answer_str_digit'], answer_str_inf):
    assert gt == inf

In [32]:
for gt, inf in zip(train_dataset['answer_str_gt'], answer_str_inf):
    assert gt == inf

In [None]:
train_dataset['']

In [7]:
sorted_data = sorted(train_dataset, key=lambda x: x['num_hops'], reverse=True)


In [8]:
sorted_data[0]

{'question': 'You can buy 4 apples or 1 watermelon for the same price. You bought 36 fruits evenly split between oranges, apples and watermelons, and the price of 1 orange is $0.50. How much does 1 apple cost if your total bill was $66?',
 'answer': 'If 36 fruits were evenly split between 3 types of fruits, then I bought 36/3 = <<36/3=12>>12 units of each fruit\nIf 1 orange costs $0.50 then 12 oranges will cost $0.50 * 12 = $<<0.5*12=6>>6\nIf my total bill was $66 and I spent $6 on oranges then I spent $66 - $6 = $<<66-6=60>>60 on the other 2 fruit types.\nAssuming the price of watermelon is W, and knowing that you can buy 4 apples for the same price and that the price of one apple is A, then 1W=4A\nIf we know we bought 12 watermelons and 12 apples for $60, then we know that $60 = 12W + 12A\nKnowing that 1W=4A, then we can convert the above to $60 = 12(4A) + 12A\n$60 = 48A + 12A\n$60 = <<60=60>>60A\nThen we know the price of one apple (A) is $60/60= $<<60/60=1>>1\n#### 1',
 'num_hops':

In [9]:
# answer validaiton 
for i in tqdm(range(len(train_dataset))):
    answer = train_dataset[i]['answer']
    answer_str, answer_float = GSM8KParser.get_answer_from_gt(answer)
    #print(answer_str, answer_float)
for i in tqdm(range(len(val_dataset))):
    answer = val_dataset[i]['answer']
    answer_str, answer_float = GSM8KParser.get_answer_from_gt(answer)
    #print(answer_str, answer_float)

 21%|██▏       | 1594/7473 [00:00<00:00, 15934.13it/s]

100%|██████████| 7473/7473 [00:00<00:00, 15688.99it/s]
100%|██████████| 1319/1319 [00:00<00:00, 17317.67it/s]


In [10]:
print(type(eval("990")))
print(type(eval("990.00")))

<class 'int'>
<class 'float'>


# Rejection Sampling 

In [11]:
model_name = "microsoft/Phi-3.5-mini-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)
print(tokenizer.special_tokens_map)
print(tokenizer.padding_side)

{'bos_token': '<s>', 'eos_token': '<|endoftext|>', 'unk_token': '<unk>', 'pad_token': '<|endoftext|>'}
left


In [12]:
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    trust_remote_code = True,
    torch_dtype = torch.bfloat16,
    device_map = "auto",
)

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
model.device

device(type='cuda', index=0)

In [14]:
idx = 0 
instance = sorted_data[idx]

chat = [
    {
        "role": "system",
        "content": prompt.Template.system
    },
    {
        "role": "user",
        "content": prompt.Template.user.format(question=instance['question'])
    }
]


In [15]:
# idxs = [100, 1000, 2000]

# convs = []
# for idx in idxs:
#     instance = train_dataset[idx] 
#     conv =[
#         {
#             "role": "system",
#             "content": prompt.Template.system
#         },
#         {
#             "role": "user",
#             "content": prompt.Template.user.format(question=instance['question'])
#         }
#     ]
#     convs.append(conv)

chats = tokenizer.apply_chat_template(
    [chat],  
    add_generation_prompt=True,
    tokenize = False,
    return_tensors='pt',
    )
print(type(chat), len(chats))
print(chats[0])

<class 'list'> 1
<|system|>
Human:
You are a helpful assistant that is exceptional at solving Grade School Math Problems.<|end|>
<|user|>
Task: Think step by step to derive the final answer for the following question.
Approach: Build intermediate steps which leads to the final answer, perform calculations when required.

```question
You can buy 4 apples or 1 watermelon for the same price. You bought 36 fruits evenly split between oranges, apples and watermelons, and the price of 1 orange is $0.50. How much does 1 apple cost if your total bill was $66?
```
After completing all steps, you must submit your final answer using only numbers and no other characters.
Write your final answer in a seperate new line preceeded by "####".<|end|>
<|assistant|>



In [16]:
#tokenizer.batch_encode_plus(chats, return_tensors='pt', padding='longest')["input_ids"]

In [22]:
from transformers.utils import is_flash_attn_2_available 
is_flash_attn_2_available()

False

In [18]:
model.eval()
samples = utils.sample_answers(
    tokenizer=tokenizer, 
    model=model, 
    chats = chats,
    max_new_tokens=256, 
    temperature=0.5,
    num_samples=10,
    top_p= 0.85,
)

The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
You are not running the flash-attention implementation, expect numerical differences.


In [20]:
print(len(samples))

10


In [21]:
rand_samples_base = ''

for sample in samples:
    rand_samples_base += (sample + '\n') 
    rand_samples_base += ("*"*50 + '\n') 

print(rand_samples_base)
with open("long_hop.txt", 'w') as f:
    f.write(rand_samples_base)
f.close()

Human:
You are a helpful assistant that is exceptional at solving Grade School Math Problems. Task: Think step by step to derive the final answer for the following question.
Approach: Build intermediate steps which leads to the final answer, perform calculations when required.

```question
You can buy 4 apples or 1 watermelon for the same price. You bought 36 fruits evenly split between oranges, apples and watermelons, and the price of 1 orange is $0.50. How much does 1 apple cost if your total bill was $66?
```
After completing all steps, you must submit your final answer using only numbers and no other characters.
Write your final answer in a seperate new line preceeded by "####". Step 1: Determine the number of each type of fruit.
Since there are 36 fruits evenly split between oranges, apples, and watermelons, we can divide 36 by 3 to find the number of each fruit type:
36 fruits / 3 = 12 fruits of each type

Step 2: Calculate the total cost of oranges.
We know that the price of 1 o