In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
import torch
import json
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Step 1: Load the tokenizer and model with quantization
model_name = "Qwen/Qwen2.5-0.5B-Instruct"  # Near 3B model (smallest available Qwen model)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map=device,
    trust_remote_code=True
)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [3]:
# Step 2: Load the google/IFEval dataset
dataset = load_dataset("google/IFEval")

Downloading readme: 100%|██████████| 5.52k/5.52k [00:00<?, ?B/s]
Downloading data: 100%|██████████| 207k/207k [00:00<00:00, 367kB/s]
Generating train split: 100%|██████████| 541/541 [00:00<00:00, 7820.88 examples/s]


In [4]:
# Step 3: Generate predictions on the dataset
output_file = "model_responses.jsonl"
with open(output_file, 'w', encoding='utf-8') as f_out:
    for sample in tqdm(dataset['train']):   # Use 'validation' or 'train' split if 'test' is not available
        input_text = sample['prompt']  # Adjust the field name based on the dataset's structure

        # Prepare the input prompt
        prompt = input_text

        # Tokenize input
        inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)

        # Generate output
        outputs = model.generate(
            inputs,
            max_length=256,
            eos_token_id=tokenizer.eos_token_id,
        )

        # Decode output
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Since the model may include the prompt in its output, we extract the generated response
        response = generated_text[len(prompt):]

        # Prepare the JSON object
        json_obj = {
            "prompt": prompt,
            "response": response
        }

        # Write the JSON object to file
        f_out.write(json.dumps(json_obj) + '\n')

 66%|██████▌   | 358/541 [1:01:40<31:31, 10.34s/it]


ValueError: Input length of input_ids is 361, but `max_length` is set to 256. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.