In [17]:
from datasets import load_dataset, Dataset, DatasetDict
from pprint import pprint

# Data Split

In [None]:
ds = load_dataset("openbmb/UltraFeedback", split="train")
ds[0]

In [8]:
dd = ds.train_test_split(test_size=1000, seed=42)

In [13]:
dd.push_to_hub("heegyu/UltraFeedback-split")

Creating parquet from Arrow format: 100%|██████████| 32/32 [00:05<00:00,  6.09ba/s]
Creating parquet from Arrow format: 100%|██████████| 32/32 [00:05<00:00,  6.10ba/s]s/it]
Pushing dataset shards to the dataset hub: 100%|██████████| 2/2 [00:20<00:00, 10.13s/it]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00,  6.05ba/s]
Pushing dataset shards to the dataset hub: 100%|██████████| 1/1 [00:00<00:00,  1.24it/s]


# Max-margin dataset

In [15]:
new_dd = DatasetDict()
for k in dd:
    ds = dd[k]
    items = []

    for item in ds:
        completions = sorted(item["completions"], key=lambda x: x["overall_score"], reverse=True)
        if len(completions) < 2:
            continue
        
        chosen, rejected = completions[0], completions[-1]

        items.append({
            "instruction": item["instruction"],
            "chosen": chosen["response"],
            "chosen_critique": chosen["critique"],
            "chosen_score": chosen["overall_score"],
            "rejected": rejected["response"],
            "rejected_critique": rejected["critique"],
            "rejected_score": rejected["overall_score"],
        })

    new_dd[k] = Dataset.from_list(items)

In [19]:
print(new_dd)
# pprint(new_dd['train'][0])
new_dd.push_to_hub("heegyu/Ultrafeedback-split-dpo-max-margin")

DatasetDict({
    train: Dataset({
        features: ['instruction', 'chosen', 'chosen_critique', 'chosen_score', 'rejected', 'rejected_critique', 'rejected_score'],
        num_rows: 62966
    })
    test: Dataset({
        features: ['instruction', 'chosen', 'chosen_critique', 'chosen_score', 'rejected', 'rejected_critique', 'rejected_score'],
        num_rows: 1000
    })
})


Creating parquet from Arrow format: 100%|██████████| 63/63 [00:01<00:00, 60.04ba/s]
Pushing dataset shards to the dataset hub: 100%|██████████| 1/1 [00:06<00:00,  6.14s/it]
Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 52.77ba/s]
Pushing dataset shards to the dataset hub: 100%|██████████| 1/1 [00:00<00:00,  1.42it/s]


# Every Pair

In [20]:
new_dd = DatasetDict()
for k in dd:
    ds = dd[k]
    items = []

    for item in ds:
        completions = sorted(item["completions"], key=lambda x: x["overall_score"], reverse=True)
        if len(completions) < 2:
            continue
        
        for i, chosen in enumerate(completions[:-1]):
            for rejected in completions[i + 1:]:
                if abs(chosen["overall_score"] - rejected["overall_score"]) < 1.0:
                    continue

                items.append({
                    "instruction": item["instruction"],
                    "chosen": chosen["response"],
                    "chosen_critique": chosen["critique"],
                    "chosen_score": chosen["overall_score"],
                    "rejected": rejected["response"],
                    "rejected_critique": rejected["critique"],
                    "rejected_score": rejected["overall_score"],
                })
            chosen, rejected = completions[0], completions[-1]

            items.append({
                "instruction": item["instruction"],
                "chosen": chosen["response"],
                "chosen_critique": chosen["critique"],
                "chosen_score": chosen["overall_score"],
                "rejected": rejected["response"],
                "rejected_critique": rejected["critique"],
                "rejected_score": rejected["overall_score"],
            })

    new_dd[k] = Dataset.from_list(items)

In [21]:
print(new_dd)
# pprint(new_dd['train'][0])
# new_dd.push_to_hub("heegyu/Ultrafeedback-split-dpo-max-margin")

DatasetDict({
    train: Dataset({
        features: ['instruction', 'chosen', 'chosen_critique', 'chosen_score', 'rejected', 'rejected_critique', 'rejected_score'],
        num_rows: 436176
    })
    test: Dataset({
        features: ['instruction', 'chosen', 'chosen_critique', 'chosen_score', 'rejected', 'rejected_critique', 'rejected_score'],
        num_rows: 6892
    })
})


# Critique 생성

In [10]:
new_dd = DatasetDict()
for k in dd:
    ds = dd[k]
    items = []

    for item in ds:
        for response in item["completions"]:
            items.append({
                "instruction": item["instruction"],
                "output": response["response"],
                "critique": response["critique"],
                "overall_score": response["overall_score"]
            })

    new_dd[k] = Dataset.from_list(items)

In [11]:
new_dd

DatasetDict({
    train: Dataset({
        features: ['instruction', 'output', 'critique', 'overall_score'],
        num_rows: 251864
    })
    test: Dataset({
        features: ['instruction', 'output', 'critique', 'overall_score'],
        num_rows: 4000
    })
})

In [12]:
new_dd.push_to_hub("heegyu/Ultrafeedback-split-critiques")

Creating parquet from Arrow format: 100%|██████████| 126/126 [00:00<00:00, 130.86ba/s]
Creating parquet from Arrow format: 100%|██████████| 126/126 [00:00<00:00, 131.69ba/s]t]
Pushing dataset shards to the dataset hub: 100%|██████████| 2/2 [00:13<00:00,  6.78s/it]
Creating parquet from Arrow format: 100%|██████████| 4/4 [00:00<00:00, 122.60ba/s]
Pushing dataset shards to the dataset hub: 100%|██████████| 1/1 [00:00<00:00,  1.46it/s]
