In [9]:
import os, io
import json
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pyarrow.parquet as pq
import pandas as pd
import glob
import tqdm
from torchvision import transforms

os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

# set gpu id
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

### Data Loading Class for GSM8K

In [10]:
class GSM8KDataset(Dataset):
    def __init__(self, split):
        root = "datasets/grade_school_math/data"
        file_path = os.path.join(root, f"{split}.jsonl")
        with open(file_path, 'r') as f:
            self.data = [json.loads(line) for line in f]
        print(f"Loaded {len(self.data)} examples.")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        return {
            "question": item["question"],
            "answer": item["answer"]
        }

def gsm8k_dataloader(batch_size=32, split="train"):
    dataset = GSM8KDataset(split)
    return DataLoader(dataset, batch_size=batch_size, shuffle=(split == "train"))

### Qwen2-1.5B Inference

In [11]:
import os, json
import time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import logging
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

logging.getLogger("transformers").setLevel(logging.ERROR)

In [12]:
def load_qwen_model():
    model_path = "checkpoints/Qwen2-1.5B"
    print(f"Attempting to load model from local path: {model_path}")
    
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True).to(device)

    except Exception as e:
        print(f"Error loading model or tokenizer: {e}")
        raise
    
    return model, tokenizer, device

def llm_inference():
    model, tokenizer, device = load_qwen_model()
    dataloader = gsm8k_dataloader(batch_size=32, split="test")
    total_samples = len(dataloader)
    
    results = []
    process_single_sample = False
    
    for i, batch in enumerate(tqdm.tqdm(dataloader)):        
        question = batch["question"][0]
        print(f"  Question: {question[:50]}...")
        
        prompt = f"Question: {question}\nAnswer:"
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        
        try:
            with torch.no_grad():
                outputs = model.generate(**inputs, max_new_tokens=50, max_time=30,
                                         num_return_sequences=1, do_sample=True, 
                                         temperature=0.7, top_p=0.95,
                                         pad_token_id=tokenizer.eos_token_id)
            
            generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
            answer = generated_text.split("Answer:")[-1].strip()
            print(f"  Answer generated: {answer[:50]}...")
        except Exception as e:
            print(f"  Error generating answer: {str(e)}")
            answer = "Error: Failed to generate answer"
        
        results.append({
            "question": question,
            "generated answer": answer,
            "ground truth": batch["answer"][0]
        })
        
        torch.cuda.empty_cache()  
        
        if process_single_sample:
            break 
    
    return results

In [5]:
results = llm_inference()
print(f"Processed {len(results)} questions from GSM8K dataset")

# Save results to a json file
output_file = "results/llm_inference_results.json"
if not os.path.exists("results"):
    os.makedirs("results")

with open(output_file, 'w') as f:
    json.dump(results, f, indent=4)
print(f"Results saved to {output_file}")

Attempting to load model from local path: checkpoints/Qwen2-1.5B
Loaded 1319 examples.


  0%|          | 0/42 [00:00<?, ?it/s]

  Question: Janet’s ducks lay 16 eggs per day. She eats three ...


  2%|▏         | 1/42 [00:02<01:28,  2.15s/it]

  Answer generated: ```python
def daily_earnings():
    """Janet’s duc...
  Question: John takes care of 10 dogs.  Each dog takes .5 hou...


  5%|▍         | 2/42 [00:03<00:58,  1.46s/it]

  Answer generated: ```python
def total_hours_spent():
    """John tak...
  Question: Sophia is thinking of taking a road trip in her ca...


  7%|▋         | 3/42 [00:04<00:48,  1.25s/it]

  Answer generated: ```python
def miles_per_tank():
    """Sophia is t...
  Question: Harry slept 9 hours last night. His friend James s...


 10%|▉         | 4/42 [00:05<00:43,  1.14s/it]

  Answer generated: ```python
from sympy import *

# Harry's sleep tim...
  Question: Well's mother sells watermelons, peppers, and oran...


 12%|█▏        | 5/42 [00:06<00:40,  1.09s/it]

  Answer generated: ```python
def total_cost():
    """Well's mother s...
  Question: A tank of water has a depth of 17 feet on Monday. ...


 14%|█▍        | 6/42 [00:07<00:37,  1.05s/it]

  Answer generated: ```python
from sympy import *

# Define the variab...
  Question: Tom plants 10 trees a year.  Every year he also ch...


 17%|█▋        | 7/42 [00:08<00:36,  1.03s/it]

  Answer generated: ```python
initial_number_of_trees = 50
trees_plant...
  Question: Carlos and Benji are at the beach. Carlos rents a ...


 19%|█▉        | 8/42 [00:09<00:34,  1.01s/it]

  Answer generated: ```python
def total_rental_cost():
    """Carlos a...
  Question: Brady is counting the money in his piggy bank. He ...


 21%|██▏       | 9/42 [00:10<00:33,  1.00s/it]

  Answer generated: ```python
pennies = 100
nickels = 40
dimes = 20
do...
  Question: A landscaping company is delivering flagstones to ...


 24%|██▍       | 10/42 [00:11<00:31,  1.00it/s]

  Answer generated: The total weight of 80 flagstones is 80*75 = 6000 ...
  Question: At the beginning of the party, there were 25 men a...


 26%|██▌       | 11/42 [00:11<00:30,  1.01it/s]

  Answer generated: ```python
from sympy import *

# Define the initia...
  Question: After Andrea saved some money, she then spent the ...


 29%|██▊       | 12/42 [00:12<00:29,  1.01it/s]

  Answer generated: ```python
def money_saved():
    """After Andrea s...
  Question: Mr. Ruther sold 3/5 of his land and had 12.8 hecta...


 31%|███       | 13/42 [00:13<00:28,  1.01it/s]

  Answer generated: Mr. Ruther sold 3/5 of his land, so he had 1 - 3/5...
  Question: A herd consists of camels and dromedaries. There a...


 33%|███▎      | 14/42 [00:14<00:27,  1.02it/s]

  Answer generated: ```python
from sympy import symbols, Eq, solve

# ...
  Question: A salesman bought a case of 48 sneakers for $576. ...


 36%|███▌      | 15/42 [00:15<00:26,  1.02it/s]

  Answer generated: ```python
# profit is total revenue minus total co...
  Question: Ben has 4 tubes of blue paint and 3 tubes of yello...


 38%|███▊      | 16/42 [00:16<00:25,  1.02it/s]

  Answer generated: ```python
def jasper_paint_tubes():
    """Ben has...
  Question: Hannah needs to drink 60 ml of water for each kilo...


 40%|████      | 17/42 [00:17<00:24,  1.02it/s]

  Answer generated: Each lap is 0.25 km, so 8 laps are 8*0.25 = 2 km.
...
  Question: Jeff and Brad are trying to divide 100 dollars bet...


 43%|████▎     | 18/42 [00:18<00:23,  1.02it/s]

  Answer generated: Let's solve this problem using Python's sympy libr...
  Question: Michael is replacing the carpet in his bedroom.  T...


 45%|████▌     | 19/42 [00:19<00:22,  1.02it/s]

  Answer generated: The cost for removing the old carpet is $4 per squ...
  Question: Joe has $50 to buy an outfit for his new field tri...


 48%|████▊     | 20/42 [00:20<00:21,  1.02it/s]

  Answer generated: ```python
from sympy import *

# Define the variab...
  Question: Bill is ordering a new truck. He has decided to pu...


 50%|█████     | 21/42 [00:21<00:20,  1.01it/s]

  Answer generated: ```python
# base price
base_price = 30000
# king c...
  Question: Christina records her mood every day on a calendar...


 52%|█████▏    | 22/42 [00:22<00:19,  1.01it/s]

  Answer generated: ```python
from sympy import *

# Define the number...
  Question: Gerald and Julia divided $100 in the ratio 3:2. If...


 55%|█████▍    | 23/42 [00:23<00:18,  1.02it/s]

  Answer generated: ```python
def money_left():
    """Gerald and Juli...
  Question: Boris has 100 apples. Beck has 23 fewer apples tha...


 57%|█████▋    | 24/42 [00:24<00:17,  1.02it/s]

  Answer generated: ```python
def apple_difference():
    """Boris has...
  Question: In one hour, Ezra read twice as many books as Ahme...


 60%|█████▉    | 25/42 [00:25<00:16,  1.02it/s]

  Answer generated: ```python
from sympy import *

# Define the variab...
  Question: Pierson scored 278 points in one game of bowling. ...


 62%|██████▏   | 26/42 [00:26<00:15,  1.02it/s]

  Answer generated: ```python
def total_points():
    """Pierson score...
  Question: Roy has saved 40% more in money earned by chores t...


 64%|██████▍   | 27/42 [00:27<00:14,  1.02it/s]

  Answer generated: ```python
def money_saved_by_roy():
    """Roy has...
  Question: George, a grade six teacher, ordered 600 burritos ...


 67%|██████▋   | 28/42 [00:28<00:13,  1.02it/s]

  Answer generated: ```python
# 600 burritos for 50 students
number_of...
  Question: John rents his car out 10 times a month for 3 hour...


 69%|██████▉   | 29/42 [00:29<00:12,  1.02it/s]

  Answer generated: ```python
def calculate_profit():
    """John rent...
  Question: Rita hand-picks Junebugs off of her plants every s...


 71%|███████▏  | 30/42 [00:30<00:11,  1.02it/s]

  Answer generated: ```python
junebugs_removed_on_monday = 39
junebugs...
  Question: A pirate crew is digging for buried treasure on th...


 74%|███████▍  | 31/42 [00:31<00:10,  1.02it/s]

  Answer generated: ```python
def holes_dug_on_fourth_day():
    """A ...
  Question: Buford writes many checks every year.  Once per mo...


 76%|███████▌  | 32/42 [00:32<00:09,  1.02it/s]

  Answer generated: ```python
def total_checks():
    """Buford writes...
  Question: Suzanne sold 80 cookies for $1 each and 60 cupcake...


 79%|███████▊  | 33/42 [00:33<00:08,  1.02it/s]

  Answer generated: ```python
number_of_cookies = 80
price_per_cookie ...
  Question: Greg puts clean sheets on 4 twin beds and 1 king s...


 81%|████████  | 34/42 [00:34<00:07,  1.02it/s]

  Answer generated: ```python
def total_laundry_loads():
    """Greg p...
  Question: There are 6 periods in the day for a normal studen...


 83%|████████▎ | 35/42 [00:35<00:06,  1.02it/s]

  Answer generated: ```python
def total_learning_time():
    """There ...
  Question: John visits his parents twice a month.  It takes h...


 86%|████████▌ | 36/42 [00:36<00:05,  1.02it/s]

  Answer generated: ```python
def miles_driven():
    """John visits h...
  Question: Mark buys one lottery ticket with a 20% chance of ...


 88%|████████▊ | 37/42 [00:37<00:04,  1.02it/s]

  Answer generated: The probability of Mark winning the first ticket i...
  Question: To heat during the winter, Ali ordered 850 kilos o...


 90%|█████████ | 38/42 [00:38<00:03,  1.02it/s]

  Answer generated: Ali ordered 850 kilos of coal, and each bag contai...
  Question: A bus has a capacity of 200 people. When it depart...


 93%|█████████▎| 39/42 [00:39<00:02,  1.02it/s]

  Answer generated: ```python
def remaining_capacity():
    """A bus h...
  Question: Jason works as a salesperson at a car dealership. ...


 95%|█████████▌| 40/42 [00:40<00:01,  1.01it/s]

  Answer generated: ```python
def telephone_calls_needed():
    """Jas...
  Question: Steve put together a puzzle that took 10 hours of ...


 98%|█████████▊| 41/42 [00:41<00:00,  1.02it/s]

  Answer generated: ```python
from sympy import *

# Define the variab...
  Question: Tom's restaurant gets 6 reservations a night.  The...


100%|██████████| 42/42 [00:42<00:00,  1.01s/it]

  Answer generated: ```python
# each reservation gets 2 meals
meals_pe...
Processed 42 questions from GSM8K dataset
Results saved to results/llm_inference_results.json





### Finetune

In [18]:
import transformers
import os
from transformers import AdamW, get_linear_schedule_with_warmup

In [19]:
def finetune_model():
    # 加载基础模型
    model, tokenizer, device = load_qwen_model()
    
    # 检查pad_token是否存在
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # 自定义微调数据集类
    class FinetuneGSM8KDataset(Dataset):
        def __init__(self, split, tokenizer, max_length=512):
            root = "datasets/grade_school_math/data"
            file_path = os.path.join(root, f"{split}.jsonl")
            with open(file_path, 'r') as f:
                self.data = [json.loads(line) for line in f]
            self.tokenizer = tokenizer
            self.max_length = max_length

        def __len__(self):
            return len(self.data)

        def __getitem__(self, idx):
            item = self.data[idx]
            question = item["question"]
            answer = item["answer"]
            
            # 构造模型输入格式
            prompt = f"Question: {question}\nAnswer: {answer}"
            inputs = self.tokenizer(
                prompt,
                max_length=self.max_length,
                padding=False,
                truncation=True,
                return_tensors=None
            )
            
            # 分离输入和标签（问答分离）
            input_ids = inputs["input_ids"]
            # 找到Answer:的位置来分割输入输出
            answer_prefix = f"\nAnswer: "
            prompt_text = f"Question: {question}{answer_prefix}"
            prompt_len = len(self.tokenizer.encode(prompt_text, add_special_tokens=False))
            
            # 设置标签（只计算答案部分的loss）
            labels = [-100]*prompt_len + input_ids[prompt_len:]
            
            return {
                "input_ids": input_ids,
                "attention_mask": [1]*len(input_ids),
                "labels": labels
            }

    # 动态填充collate函数
    def collate_fn(batch):
        pad_token_id = tokenizer.pad_token_id
        max_length = max(len(item["input_ids"]) for item in batch)
        
        padded_batch = {
            "input_ids": [],
            "attention_mask": [],
            "labels": []
        }
        
        for item in batch:
            pad_len = max_length - len(item["input_ids"])
            padded_batch["input_ids"].append(
                item["input_ids"] + [pad_token_id]*pad_len
            )
            padded_batch["attention_mask"].append(
                item["attention_mask"] + [0]*pad_len
            )
            padded_batch["labels"].append(
                item["labels"] + [-100]*pad_len
            )
        
        # 转换为tensor并移到设备
        for key in padded_batch:
            padded_batch[key] = torch.tensor(padded_batch[key], dtype=torch.long).to(device)
            
        return padded_batch

    # 创建数据加载器
    train_dataset = FinetuneGSM8KDataset("train", tokenizer)
    train_loader = DataLoader(
        train_dataset,
        batch_size=4,  # 根据GPU显存调整
        shuffle=True,
        collate_fn=collate_fn
    )

    # 训练配置
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
    num_epochs = 3
    total_steps = len(train_loader) * num_epochs
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=int(0.1*total_steps),
        num_training_steps=total_steps
    )

    # 训练循环
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0
        progress_bar = tqdm.tqdm(train_loader, desc=f"Epoch {epoch+1}")
        
        for batch in progress_bar:
            optimizer.zero_grad()
            outputs = model(**batch)
            loss = outputs.loss
            loss.backward()
            
            # 梯度裁剪
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            
            optimizer.step()
            scheduler.step()
            
            epoch_loss += loss.item()
            progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})
        
        # 打印epoch统计信息
        avg_loss = epoch_loss / len(train_loader)
        print(f"Epoch {epoch+1} completed. Average Loss: {avg_loss:.4f}")

    # 保存微调后的模型
    finetuned_path = "checkpoints/Qwen2-1.5B-finetuned"
    model.save_pretrained(finetuned_path)
    tokenizer.save_pretrained(finetuned_path)
    print(f"Fine-tuned model saved to {finetuned_path}")

In [20]:
# 修改后的推理函数（加载微调后的模型）
def llm_inference_after_finetune():
    # 加载微调后的模型
    finetuned_path = "checkpoints/Qwen2-1.5B-finetuned"
    print(f"Loading fine-tuned model from {finetuned_path}")
    
    tokenizer = AutoTokenizer.from_pretrained(finetuned_path, trust_remote_code=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = AutoModelForCausalLM.from_pretrained(finetuned_path, trust_remote_code=True).to(device)
    
    dataloader = gsm8k_dataloader(batch_size=32, split="test")
    total_samples = len(dataloader)
    
    results = []
    process_single_sample = False
    
    for i, batch in enumerate(tqdm.tqdm(dataloader)):        
        question = batch["question"][0]
        print(f"  Question: {question[:50]}...")
        
        prompt = f"Question: {question}\nAnswer:"
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        
        try:
            with torch.no_grad():
                outputs = model.generate(**inputs, max_new_tokens=50, max_time=30,
                                         num_return_sequences=1, do_sample=True, 
                                         temperature=0.7, top_p=0.95,
                                         pad_token_id=tokenizer.eos_token_id)
            
            generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
            answer = generated_text.split("Answer:")[-1].strip()
            print(f"  Answer generated: {answer[:50]}...")
        except Exception as e:
            print(f"  Error generating answer: {str(e)}")
            answer = "Error: Failed to generate answer"
        
        results.append({
            "question": question,
            "generated answer": answer,
            "ground truth": batch["answer"][0]
        })
        
        torch.cuda.empty_cache()  
        
        if process_single_sample:
            break 
    
    return results

In [22]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# 执行微调
finetune_model()

# 使用微调后的模型进行推理
results = llm_inference()

# 保存结果
output_file = "results/finetuned_llm_results.json"
with open(output_file, 'w') as f:
    json.dump(results, f, indent=4)
print(f"Inference results saved to {output_file}")

Attempting to load model from local path: checkpoints/Qwen2-1.5B


Epoch 1: 100%|██████████| 1869/1869 [22:37<00:00,  1.38it/s, loss=0.2774]


Epoch 1 completed. Average Loss: 0.4077


Epoch 2: 100%|██████████| 1869/1869 [22:36<00:00,  1.38it/s, loss=0.0822]


Epoch 2 completed. Average Loss: 0.2379


Epoch 3: 100%|██████████| 1869/1869 [22:44<00:00,  1.37it/s, loss=0.1079]


Epoch 3 completed. Average Loss: 0.0695
Fine-tuned model saved to checkpoints/Qwen2-1.5B-finetuned
Attempting to load model from local path: checkpoints/Qwen2-1.5B
Loaded 1319 examples.


  0%|          | 0/42 [00:00<?, ?it/s]

  Question: Janet’s ducks lay 16 eggs per day. She eats three ...


  2%|▏         | 1/42 [00:01<00:51,  1.27s/it]

  Answer generated: Janet has 16-3 = 13 eggs left after breakfast.
She...
  Question: John takes care of 10 dogs.  Each dog takes .5 hou...


  5%|▍         | 2/42 [00:02<00:43,  1.09s/it]

  Answer generated: Each dog takes 0.5 hours a day to walk and take ca...
  Question: Sophia is thinking of taking a road trip in her ca...


  7%|▋         | 3/42 [00:03<00:40,  1.04s/it]

  Answer generated: ```python
def miles_per_tank():
    """Sophia is t...
  Question: Harry slept 9 hours last night. His friend James s...


 10%|▉         | 4/42 [00:04<00:38,  1.01s/it]

  Answer generated: ```python
def sleep_difference():
    """Harry sle...
  Question: Well's mother sells watermelons, peppers, and oran...


 12%|█▏        | 5/42 [00:05<00:36,  1.01it/s]

  Answer generated: ```python
def total_spent():
    """Well's mother ...
  Question: A tank of water has a depth of 17 feet on Monday. ...


 14%|█▍        | 6/42 [00:06<00:35,  1.02it/s]

  Answer generated: ```python
def water_depth_on_wednesday():
    """A...
  Question: Tom plants 10 trees a year.  Every year he also ch...


 17%|█▋        | 7/42 [00:07<00:34,  1.02it/s]

  Answer generated: ```python
def remaining_trees():
    """Tom plants...
  Question: Carlos and Benji are at the beach. Carlos rents a ...


 19%|█▉        | 8/42 [00:08<00:33,  1.03it/s]

  Answer generated: ```python
from sympy import *

# Define the variab...
  Question: Brady is counting the money in his piggy bank. He ...


 21%|██▏       | 9/42 [00:08<00:32,  1.03it/s]

  Answer generated: ```python
pennies = 100
nickels = 40
dimes = 20
do...
  Question: A landscaping company is delivering flagstones to ...


 24%|██▍       | 10/42 [00:09<00:31,  1.03it/s]

  Answer generated: ```python
def trucks_needed():
    """A landscapin...
  Question: At the beginning of the party, there were 25 men a...


 26%|██▌       | 11/42 [00:10<00:29,  1.03it/s]

  Answer generated: ```python
from sympy import *

# Define the initia...
  Question: After Andrea saved some money, she then spent the ...


 29%|██▊       | 12/42 [00:11<00:28,  1.03it/s]

  Answer generated: ```python
def money_saved():
    """After Andrea s...
  Question: Mr. Ruther sold 3/5 of his land and had 12.8 hecta...


 31%|███       | 13/42 [00:12<00:27,  1.04it/s]

  Answer generated: Mr. Ruther sold 3/5 of his land, which is 12.8 hec...
  Question: A herd consists of camels and dromedaries. There a...


 33%|███▎      | 14/42 [00:13<00:27,  1.03it/s]

  Answer generated: ```python
def num_dromedaries():
    """A herd con...
  Question: A salesman bought a case of 48 sneakers for $576. ...


 36%|███▌      | 15/42 [00:14<00:26,  1.03it/s]

  Answer generated: ```python
def calculate_profit():
    """A salesma...
  Question: Ben has 4 tubes of blue paint and 3 tubes of yello...


 38%|███▊      | 16/42 [00:15<00:25,  1.04it/s]

  Answer generated: ```python
from sympy import *

# Define the number...
  Question: Hannah needs to drink 60 ml of water for each kilo...


 40%|████      | 17/42 [00:16<00:24,  1.04it/s]

  Answer generated: ```python
def water_needed():
    """Hannah needs ...
  Question: Jeff and Brad are trying to divide 100 dollars bet...


 43%|████▎     | 18/42 [00:17<00:23,  1.04it/s]

  Answer generated: Let's assume Brad gets $x$ dollars.
Then Jeff gets...
  Question: Michael is replacing the carpet in his bedroom.  T...


 45%|████▌     | 19/42 [00:18<00:22,  1.04it/s]

  Answer generated: ```python
# carpet cost
carpet_cost = 12
# padding...
  Question: Joe has $50 to buy an outfit for his new field tri...


 48%|████▊     | 20/42 [00:19<00:21,  1.04it/s]

  Answer generated: ```python
from sympy import *

# Define the variab...
  Question: Bill is ordering a new truck. He has decided to pu...


 50%|█████     | 21/42 [00:20<00:20,  1.03it/s]

  Answer generated: ```python
def total_truck_cost():
    """Bill is o...
  Question: Christina records her mood every day on a calendar...


 52%|█████▏    | 22/42 [00:21<00:19,  1.03it/s]

  Answer generated: Christina had 12 good days, 8 bad days, and 8 neut...
  Question: Gerald and Julia divided $100 in the ratio 3:2. If...


 55%|█████▍    | 23/42 [00:22<00:18,  1.04it/s]

  Answer generated: The ratio of Gerald's share to Julia's share is 3:...
  Question: Boris has 100 apples. Beck has 23 fewer apples tha...


 57%|█████▋    | 24/42 [00:23<00:17,  1.04it/s]

  Answer generated: ```python
def apple_difference():
    """Boris has...
  Question: In one hour, Ezra read twice as many books as Ahme...


 60%|█████▉    | 25/42 [00:24<00:16,  1.04it/s]

  Answer generated: Ahmed read 300/2 = 150 books in one hour.
Ezra has...
  Question: Pierson scored 278 points in one game of bowling. ...


 62%|██████▏   | 26/42 [00:25<00:15,  1.04it/s]

  Answer generated: ```python
def total_points():
    """Pierson score...
  Question: Roy has saved 40% more in money earned by chores t...


 64%|██████▍   | 27/42 [00:26<00:14,  1.04it/s]

  Answer generated: Roy saved 40% more than Anthony, so Roy saved $10....
  Question: George, a grade six teacher, ordered 600 burritos ...


 67%|██████▋   | 28/42 [00:27<00:13,  1.04it/s]

  Answer generated: ```python
from sympy import *

# Define the variab...
  Question: John rents his car out 10 times a month for 3 hour...


 69%|██████▉   | 29/42 [00:28<00:12,  1.04it/s]

  Answer generated: ```python
# number of hours rented
rented_hours = ...
  Question: Rita hand-picks Junebugs off of her plants every s...


 71%|███████▏  | 30/42 [00:29<00:11,  1.04it/s]

  Answer generated: ```python
junebug_count_monday = 39
junebug_count_...
  Question: A pirate crew is digging for buried treasure on th...


 74%|███████▍  | 31/42 [00:30<00:10,  1.03it/s]

  Answer generated: ```python
def holes_dug_fourth_day():
    """A pir...
  Question: Buford writes many checks every year.  Once per mo...


 76%|███████▌  | 32/42 [00:31<00:09,  1.03it/s]

  Answer generated: There are 12 months in a year.
Buford writes a che...
  Question: Suzanne sold 80 cookies for $1 each and 60 cupcake...


 79%|███████▊  | 33/42 [00:32<00:08,  1.04it/s]

  Answer generated: ```python
from sympy import *

# Define the variab...
  Question: Greg puts clean sheets on 4 twin beds and 1 king s...


 81%|████████  | 34/42 [00:33<00:07,  1.04it/s]

  Answer generated: ```python
# number of sheets to be washed per week...
  Question: There are 6 periods in the day for a normal studen...


 83%|████████▎ | 35/42 [00:34<00:06,  1.04it/s]

  Answer generated: ```python
def learning_time():
    """There are 6 ...
  Question: John visits his parents twice a month.  It takes h...


 86%|████████▌ | 36/42 [00:35<00:05,  1.04it/s]

  Answer generated: ```python
def total_miles():
    """John visits hi...
  Question: Mark buys one lottery ticket with a 20% chance of ...


 88%|████████▊ | 37/42 [00:35<00:04,  1.04it/s]

  Answer generated: ```python
from sympy import Rational

# Probabilit...
  Question: To heat during the winter, Ali ordered 850 kilos o...


 90%|█████████ | 38/42 [00:36<00:03,  1.04it/s]

  Answer generated: ```python
def cost_of_order():
    """To heat duri...
  Question: A bus has a capacity of 200 people. When it depart...


 93%|█████████▎| 39/42 [00:37<00:02,  1.04it/s]

  Answer generated: ```python
def remaining_capacity():
    """A bus h...
  Question: Jason works as a salesperson at a car dealership. ...


 95%|█████████▌| 40/42 [00:38<00:01,  1.03it/s]

  Answer generated: ```python
def calculate_calls():
    """Jason work...
  Question: Steve put together a puzzle that took 10 hours of ...


 98%|█████████▊| 41/42 [00:39<00:00,  1.04it/s]

  Answer generated: ```python
from sympy import *

# Define the variab...
  Question: Tom's restaurant gets 6 reservations a night.  The...


100%|██████████| 42/42 [00:40<00:00,  1.03it/s]

  Answer generated: ```python
# Tom's restaurant gets 6 reservations a...
Inference results saved to results/finetuned_llm_results.json



