In [11]:
from datasets import load_dataset

# This will download the dataset and load it
tldr_dataset = load_dataset("CarperAI/openai_summarize_tldr")

# To see the available splits
print(tldr_dataset)

# To access the train split, for example:
tldr_train_data = tldr_dataset["train"]
print(train_data[0])


DatasetDict({
    train: Dataset({
        features: ['prompt', 'label'],
        num_rows: 116722
    })
    test: Dataset({
        features: ['prompt', 'label'],
        num_rows: 6553
    })
    valid: Dataset({
        features: ['prompt', 'label'],
        num_rows: 6447
    })
})
{'prompt': 'SUBREDDIT: r/relationships\nTITLE: To admit or not to admit snooping...\nPOST: I [25M] have snooped in the past and copped up to it to my gf [25F] of 6 years.  We talked it through.  It had been a year or two since the last time.  That\'s an issue I\'m working on.\n\nNow she has a new close male work friend.  I won\'t go into details, but she hides things from me with him and does other things to make me a bit suspicious.  So...I snooped again, and this time, all texts from her new friend have been deleted and I saw a google search for "how to get over a guy" near some searches of his name and views of his Facebook profile.\n\nI asked her about this guy, not mentioning the snooping, and she 

In [6]:

dataset = load_dataset("CarperAI/openai_summarize_comparisons")

# To see the available splits
print(dataset)

# To access the train split, for example:
summarize_train_data = dataset["train"]
print(train_data[0])

DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected'],
        num_rows: 92534
    })
    test: Dataset({
        features: ['prompt', 'chosen', 'rejected'],
        num_rows: 83629
    })
    valid1: Dataset({
        features: ['prompt', 'chosen', 'rejected'],
        num_rows: 33082
    })
    valid2: Dataset({
        features: ['prompt', 'chosen', 'rejected'],
        num_rows: 50715
    })
})
{'prompt': 'SUBREDDIT: r/relationships\nTITLE: To admit or not to admit snooping...\nPOST: I [25M] have snooped in the past and copped up to it to my gf [25F] of 6 years.  We talked it through.  It had been a year or two since the last time.  That\'s an issue I\'m working on.\n\nNow she has a new close male work friend.  I won\'t go into details, but she hides things from me with him and does other things to make me a bit suspicious.  So...I snooped again, and this time, all texts from her new friend have been deleted and I saw a google search for "how to get 

In [8]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen3-0.6B-Base"  # Replace with Qwen3 if available
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForCausalLM.from_pretrained(model_name)

In [17]:
# Print a few examples
for i in range(3):
    print(tldr_dataset["train"][i]["prompt"])
    print(tldr_dataset["train"][i]["label"])
    print("-" * 40)

SUBREDDIT: r/relationships
TITLE: I (f/22) have to figure out if I want to still know these girls or not and would hate to sound insulting
POST: Not sure if this belongs here but it's worth a try. 

Backstory:
When I (f/22) went through my first real breakup 2 years ago because he needed space after a year of dating roand  it effected me more than I thought. It was a horrible time in my life due to living with my mother and finally having the chance to cut her out of my life. I can admit because of it was an emotional wreck and this guy was stable and didn't know how to deal with me. We ended by him avoiding for a month or so after going to a festival with my friends. When I think back I wish he just ended. So after he ended it added my depression I suffered but my friends helped me through it and I got rid of everything from him along with cutting contact. 

Now: Its been almost 3 years now and I've gotten better after counselling and mild anti depressants. My mother has been out of m

In [22]:
def preprocess_function(example):
    # You can add a prompt if you want, e.g., "Summarize: "
    input_text = example["prompt"]
    target_text = example["label"]
    model_inputs = tokenizer(
        input_text, max_length=512, truncation=True, padding="max_length"
    )
    labels = tokenizer(
        target_text, max_length=64, truncation=True, padding="max_length"
    )["input_ids"]
    model_inputs["labels"] = labels
    return model_inputs

tokenized_dataset = tldr_dataset["train"].map(preprocess_function, batched=False)

Map: 100%|██████████| 116722/116722 [01:34<00:00, 1230.75 examples/s]


In [1]:
import torch
from torch.utils.data import DataLoader
from transformers import AutoModelForCausalLM, AutoTokenizer


# Load model and tokenizer
model_name = "Qwen/Qwen1.5-0.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
device = torch.device("cpu")

model.to(device)

# Prepare a small dataset for demonstration
from datasets import load_dataset
dataset = load_dataset("CarperAI/openai_summarize_tldr", split="train[:1000]")  # Use a small subset for speed

def collate_fn(batch):
    # Concatenate prompt and label for each example
    input_texts = [x["prompt"] + "\nTL;DR: " + x["label"] for x in batch]
    # Tokenize the concatenated text
    encodings = tokenizer(
        input_texts,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=128,
    )
    # For SFT, labels are the same as input_ids
    labels = encodings["input_ids"].clone()
    encodings = {k: v.to(device) for k, v in encodings.items()}
    labels = labels.to(device)
    return encodings, labels

dataloader = DataLoader(dataset, batch_size=1, shuffle=True, collate_fn=collate_fn)

# Optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

# Training loop
model.train()
for epoch in range(1):  # 1 epoch for demo
    for batch_idx, (inputs, labels) in enumerate(dataloader):
        outputs = model(**inputs, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if batch_idx % 10 == 0:
            print(f"Epoch {epoch} Batch {batch_idx} Loss: {loss.item():.4f}")

  from .autonotebook import tqdm as notebook_tqdm


Epoch 0 Batch 0 Loss: 3.8355
Epoch 0 Batch 10 Loss: 2.5739


KeyboardInterrupt: 

In [30]:
import transformers, accelerate
print(transformers.__version__)
print(accelerate.__version__)

4.53.2
1.8.1
