In [5]:
from datasets import load_dataset, concatenate_datasets
import datasets
subsets = ["helpful-base", "harmless-base", "helpful-online", "helpful-rejection-sampled"]
all_datasets = []
for d in subsets:
    ds = load_dataset("Anthropic/hh-rlhf", data_dir=d)
    for split in ds.keys():
        ds[split] = ds[split].add_column("source_dir", [d] * len(ds[split]))
    all_datasets.append(ds)
train_full = concatenate_datasets([ds[ "train"] for ds in all_datasets])
test_full = concatenate_datasets([ds["test"] for ds in all_datasets])
full = datasets.DatasetDict({"train": train_full, "test": test_full})
print(full)
print(train_full[0]["source_dir"])

DatasetDict({
    train: Dataset({
        features: ['chosen', 'rejected', 'source_dir'],
        num_rows: 160800
    })
    test: Dataset({
        features: ['chosen', 'rejected', 'source_dir'],
        num_rows: 8552
    })
})
helpful-base


In [6]:
train: datasets.Dataset = full["train"]
test: datasets.Dataset = full["test"]
print("total train examples:", len(train))
print("total test examples:", len(test))

total train examples: 160800
total test examples: 8552


In [7]:
import regex as re
def filter(example):
    # filter multi-turn conversations
    chosen = example["chosen"]
    # we want exactly one "Human: " and one "Assistant: ", all need at the start of line
    human_count = len(re.findall(r"^Human: ", chosen, flags=re.MULTILINE))
    assistant_count = len(re.findall(r"^Assistant: ", chosen, flags=re.MULTILINE))
    return human_count == 1 and assistant_count == 1
train = train.filter(filter)
test = test.filter(filter)
print("filtered train examples:", len(train))
print("filtered test examples:", len(test))

Filter:   0%|          | 0/160800 [00:00<?, ? examples/s]

Filter:   0%|          | 0/8552 [00:00<?, ? examples/s]

filtered train examples: 49386
filtered test examples: 2609


In [9]:
def extract_prompt_and_response(example):
    chosen = example["chosen"]
    prompt_match = re.search(r"^Human: (.+?)^Assistant: ", chosen, flags=re.MULTILINE | re.DOTALL)
    response_match = re.search(r"^Assistant: (.+)$", chosen, flags=re.MULTILINE | re.DOTALL)
    prompt = prompt_match.group(1).strip() if prompt_match else ""
    resp_good = response_match.group(1).strip() if response_match else ""
    reject = example["rejected"]
    return_prompt_match = re.search(r"^Human: (.+?)^Assistant: ", reject, flags=re.MULTILINE | re.DOTALL)
    if return_prompt_match:
        return_prompt = return_prompt_match.group(1).strip()
        assert return_prompt == prompt, "Prompt in chosen and rejected do not match!"
    response_match = re.search(r"^Assistant: (.+)$", reject, flags=re.MULTILINE | re.DOTALL)
    resp_bad = response_match.group(1).strip() if response_match else "" 
    return {"prompt": prompt, "good": resp_good, "bad": resp_bad}
train = train.map(extract_prompt_and_response)
test = test.map(extract_prompt_and_response)

Map:   0%|          | 0/49386 [00:00<?, ? examples/s]

Map:   0%|          | 0/2609 [00:00<?, ? examples/s]

In [12]:
import random
sample = random.choice(train)
print("output_dir:", sample["source_dir"])
print("Prompt:")
print(sample["prompt"])
print("\nGood Response:")
print(sample["good"])
print("\nBad Response:")
print(sample["bad"])

output_dir: helpful-online
Prompt:
Good morning! My cat won't cover his poop in the litter box. Is there anyway I can teach him to cover it?

Good Response:
Gosh, it does seem like a weird habit for a cat to have, and yes, there are some things you can try to encourage him. It is important to keep in mind that each cat has his own unique personality, and some are just more particular than others.  The most important thing is to try to make the litter area as stress-free as possible for your cat, since stress could potentially be a cause.  For example, you should make sure to thoroughly clean the litter box frequently, provide a high quality litter, and choose the proper location.  You might also consider providing more than one litter box to stretch out the time between changes.  You can also carefully observe the cat’s behavior and habits to come up with ways to encourage him to cover his poop.  For example, you could place a treat under the poop when he exits the litter box to encour

In [None]:
import torch
a = torch.tensor([[1, 2, 3], [4, 5, 6]])
b = torch.tensor([0, 0]).unsqueeze(-1)
print(a.shape)
print(b.shape)
print(a.gather(dim=1, index=b))

torch.Size([2, 3])
torch.Size([2, 1])


RuntimeError: index -1 is out of bounds for dimension 1 with size 3