In [27]:
"""
Preprocess dataset for countdown task - given a target number and N numbers, generate equations to reach target
"""

import re
import os
from datasets import Dataset, load_dataset
import datasets
from random import randint, seed, choice
from typing import List, Tuple
from tqdm import tqdm
import argparse


def gen_dataset(
    num_samples: int,
    num_operands: int = 6,
    max_target: int = 1000,
    min_number: int = 1,
    max_number: int = 100,
    operations: List[str] = ['+', '-', '*', '/'],
    seed_value: int = 42,
) -> List[Tuple]:
    """Generate dataset for countdown task.
    
    Args:
        num_samples: Number of samples to generate
        num_operands: Number of numbers provided in each sample
        max_target: Maximum value for target number
        min_number: Minimum value for provided numbers
        max_number: Maximum value for provided numbers
        operations: List of allowed operations
        seed_value: Random seed for reproducibility
        
    Returns:
        List of tuples containing (target, numbers, solution)
    """
    seed(seed_value)
    samples = []
    
    for _ in tqdm(range(num_samples)):
        # Generate random target
        target = randint(1, max_target)
        
        # Generate random numbers
        numbers = [randint(min_number, max_number) for _ in range(num_operands)]
        
        
        samples.append((target, numbers))
    
    return samples

def make_prefix(dp, template_type):
    target = dp['target']
    numbers = dp['nums']
    # NOTE: also need to change reward_score/countdown.py
    if template_type == 'base':
        """This works for any base model"""
        prefix = f"""A conversation between User and Assistant. The user asks a question, and the Assistant solves it. The assistant first thinks about the reasoning process in the mind and then provides the user with the answer.
User: Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.
Assistant: Let me solve this step by step.
<think>"""
    elif template_type == 'qwen-instruct':
        """This works for Qwen Instruct Models"""
        prefix = f"""<|im_start|>system\nYou are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer.<|im_end|>\n<|im_start|>user\n Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.<|im_end|>\n<|im_start|>assistant\nLet me solve this step by step.\n<think>"""
    return prefix


if __name__ == '__main__':
    

    data_source = 'countdown'


    raw_dataset = load_dataset('Jiayi-Pan/Countdown-Tasks-3to4', split='train')

    def make_map_fn(split):
        def process_fn(example, idx):
            question = make_prefix(example, template_type="qwen-instruct")
            solution = {
                "target": example['target'],
                "numbers": example['nums']
            }
            data = {
                "data_source": data_source,
                "prompt": [{
                    "role": "user",
                    "content": question,
                }],
                "ability": "math",
                "reward_model": {
                    "style": "rule",
                    "ground_truth": solution
                },
                "extra_info": {
                    'split': split,
                    'index': idx,
                }
            }
            return data
        return process_fn
    
    raw_dataset = raw_dataset.map(function=make_map_fn('train'), with_indices=True)
    raw_dataset = raw_dataset.train_test_split(test_size=0.1, seed=42)
    raw_dataset.save_to_disk("combination_task")

Map: 100%|██████████| 490364/490364 [00:08<00:00, 57499.83 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 441327/441327 [00:03<00:00, 133137.47 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 49037/49037 [00:00<00:00, 152650.49 examples/s]


In [28]:
import datasets
dataset = datasets.load_from_disk("combination_task")

In [29]:
dataset = dataset['train']

In [30]:
dataset[0]

{'target': 38,
 'nums': [71, 80, 97, 4],
 'data_source': 'countdown',
 'prompt': [{'content': '<|im_start|>system\nYou are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer.<|im_end|>\n<|im_start|>user\n Using the numbers [71, 80, 97, 4], create an equation that equals 38. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.<|im_end|>\n<|im_start|>assistant\nLet me solve this step by step.\n<think>',
   'role': 'user'}],
 'ability': 'math',
 'reward_model': {'ground_truth': {'numbers': [71, 80, 97, 4], 'target': 38},
  'style': 'rule'},
 'extra_info': {'index': 272711, 'split': 'train'}}

In [None]:
def map_fn(example):
    prompt = [{"role": "system", "content": "You are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer"}, {"role": "user", "content": f"Using the numbers {example['nums']}, create an equation that equals {example['target']}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>."}]
    return {
        "prompt": prompt,
        "ground_truth": example['answer']
    }
raw_dataset = load_dataset('Jiayi-Pan/Countdown-Tasks-3to4', split='train')

In [31]:
raw_dataset = load_dataset('Jiayi-Pan/Countdown-Tasks-3to4', split='train')